use crate::error::*;
use crate::{EpochDays, PackedTimestamp};
#[repr(C)]
#[derive(PartialEq, Clone, Debug, Default)]
struct SimdTimestamp {
year_hi: u16,
year_lo: u16,
month: u16,
day: u16,
hour: u16,
minute: u16,
pad1: u16,
pad2: u16,
}
const _: () = {
assert!(std::mem::size_of::<SimdTimestamp>() == 16);
};
impl SimdTimestamp {
fn new(year: u16, month: u16, day: u16, hour: u16, minute: u16) -> Self {
Self {
year_hi: year / 100,
year_lo: year % 100,
month,
day,
hour,
minute,
pad1: 0,
pad2: 0,
}
}
}
#[derive(PartialEq, Clone, Debug, Default)]
pub(crate) struct Timestamp {
year: u16,
month: u8,
day: u8,
hour: u8,
minute: u8,
second: u8,
millisecond: u32,
offset_minute: i32,
}
impl Timestamp {
pub(crate) fn new(
year: u16,
month: u8,
day: u8,
hour: u8,
minute: u8,
second: u8,
millisecond: u32,
) -> Self {
Self {
year,
month,
day,
hour,
minute,
second,
millisecond,
offset_minute: 0,
}
}
pub(crate) fn new_with_offset_minute(
year: u16,
month: u8,
day: u8,
hour: u8,
minute: u8,
second: u8,
millisecond: u32,
offset_minute: i32,
) -> Self {
Self {
year,
month,
day,
hour,
minute,
second,
millisecond,
offset_minute,
}
}
pub(crate) fn to_packed(&self) -> PackedTimestamp {
PackedTimestamp::new(
self.year as _,
self.month as _,
self.day as _,
self.hour as _,
self.minute as _,
self.second as _,
self.millisecond as _,
self.offset_minute,
)
}
}
#[inline(always)]
fn ts_to_epoch_millis(ts: &Timestamp) -> i64 {
let epoch_day =
EpochDays::from_ymd(ts.year as i32, ts.month as i32, ts.day as i32).days() as i64;
let h = ts.hour as i64;
let m = ts.minute as i64;
let s = ts.second as i64;
let offset_minute = ts.offset_minute as i64;
let seconds = epoch_day * 24 * 60 * 60 + h * 60 * 60 + m * 60 + s as i64 - offset_minute * 60;
seconds * 1000 + ts.millisecond as i64
}
#[doc(hidden)]
pub fn parse_to_epoch_millis_scalar(input: &str) -> ParseResult<i64> {
let ts = parse_scalar(input.as_bytes())?;
Ok(ts_to_epoch_millis(&ts))
}
#[doc(hidden)]
pub fn parse_to_packed_timestamp_scalar(input: &str) -> ParseResult<PackedTimestamp> {
let ts = parse_scalar(input.as_bytes())?;
Ok(PackedTimestamp::new(
ts.year as i32,
ts.month as u32,
ts.day as u32,
ts.hour as u32,
ts.minute as u32,
ts.second as u32,
ts.millisecond as u32,
ts.offset_minute,
))
}
pub(crate) fn parse_scalar(bytes: &[u8]) -> ParseResult<Timestamp> {
if bytes.len() < 16 {
return Err(ParseError::InvalidLen(bytes.len()));
}
let mut timestamp = Timestamp::default();
let mut index = 0;
let year = parse_num4(bytes, &mut index)?;
expect(bytes, &mut index, b'-')?;
let month = parse_num2(bytes, &mut index)?;
expect(bytes, &mut index, b'-')?;
let day = parse_num2(bytes, &mut index)?;
expect2(bytes, &mut index, b'T', b' ')?;
let hour = parse_num2(bytes, &mut index)?;
expect(bytes, &mut index, b':')?;
let minute = parse_num2(bytes, &mut index)?;
let (second, nano) = parse_seconds_and_nanos(bytes, &mut index)?;
let offset = parse_utc_or_offset_minutes(bytes, &mut index)?;
timestamp.year = year as u16;
timestamp.month = month as u8;
timestamp.day = day as u8;
timestamp.hour = hour as u8;
timestamp.minute = minute as u8;
timestamp.second = second as u8;
timestamp.millisecond = nano / 1_000_000;
timestamp.offset_minute = offset;
Ok(timestamp)
}
#[inline(always)]
fn parse_seconds_and_nanos(bytes: &[u8], index: &mut usize) -> ParseResult<(u32, u32)> {
let mut second = 0;
let mut nano = 0;
if *index < bytes.len() {
let ch = bytes[*index];
if ch == b'.' {
*index += 1;
nano = parse_nano(bytes, index)?;
} else if ch == b':' {
*index += 1;
second = parse_num2(bytes, index)?;
if *index < bytes.len() && bytes[*index] == b'.' {
*index += 1;
nano = parse_nano(bytes, index)?;
}
}
}
Ok((second, nano))
}
#[inline(never)]
fn parse_seconds_and_nanos_and_offset_minutes_slow_path(
bytes: &[u8],
index: &mut usize,
) -> ParseResult<(u32, u32, i32)> {
let (seconds, nanos) = parse_seconds_and_nanos(bytes, index)?;
let offset_minutes = parse_utc_or_offset_minutes(bytes, index)?;
Ok((seconds, nanos, offset_minutes))
}
#[inline(always)]
fn parse_utc_or_offset_minutes(bytes: &[u8], index: &mut usize) -> ParseResult<i32> {
if *index >= bytes.len() {
return Err(ParseError::InvalidLen(*index));
}
let first = bytes[*index];
if first == b'Z' {
*index += 1;
if *index != bytes.len() {
Err(ParseError::TrailingChar(*index))
} else {
Ok(0)
}
} else if first == b'+' {
*index += 1;
Ok(parse_offset_minutes(bytes, index)? as i32)
} else if first == b'-' {
*index += 1;
Ok(-(parse_offset_minutes(bytes, index)? as i32))
} else {
Err(ParseError::InvalidChar(*index))
}
}
#[inline(always)]
fn parse_offset_minutes(bytes: &[u8], index: &mut usize) -> ParseResult<u32> {
let offset_hour = parse_num2(bytes, index)?;
expect(bytes, index, b':')?;
let offset_minute = parse_num2(bytes, index)?;
Ok(offset_hour * 60 + offset_minute)
}
#[inline(always)]
fn parse_num2(bytes: &[u8], i: &mut usize) -> ParseResult<u32> {
let d1 = digit(bytes, i)?;
let d2 = digit(bytes, i)?;
Ok(d1 * 10 + d2)
}
#[inline(always)]
fn parse_num4(bytes: &[u8], i: &mut usize) -> ParseResult<u32> {
let d1 = digit(bytes, i)?;
let d2 = digit(bytes, i)?;
let d3 = digit(bytes, i)?;
let d4 = digit(bytes, i)?;
Ok(d1 * 1000 + d2 * 100 + d3 * 10 + d4)
}
const NANO_MULTIPLIER: [u32; 9] = [
1,
10,
100,
1_000,
10_000,
100_000,
1_000_000,
10_000_000,
100_000_000,
];
#[inline(always)]
fn parse_nano(bytes: &[u8], i: &mut usize) -> ParseResult<u32> {
let mut r = digit(bytes, i)?;
let mut j = 1;
while *i < bytes.len() && j < 9 {
let ch = bytes[*i];
if ch >= b'0' && ch <= b'9' {
r = r * 10 + (ch - b'0') as u32;
j += 1;
*i += 1;
} else {
break;
}
}
Ok(r * NANO_MULTIPLIER[9 - j])
}
#[inline(always)]
fn expect(bytes: &[u8], i: &mut usize, expected: u8) -> ParseResult<()> {
if *i >= bytes.len() {
return Err(ParseError::InvalidLen(*i));
}
let ch = bytes[*i];
if ch == expected {
*i += 1;
Ok(())
} else {
Err(ParseError::InvalidChar(*i))
}
}
#[inline(always)]
fn expect2(bytes: &[u8], i: &mut usize, expected1: u8, expected2: u8) -> ParseResult<u8> {
if *i >= bytes.len() {
return Err(ParseError::InvalidLen(*i));
}
let ch = bytes[*i];
if ch == expected1 || ch == expected2 {
*i += 1;
Ok(ch)
} else {
Err(ParseError::InvalidChar(*i))
}
}
#[inline(always)]
fn digit(bytes: &[u8], i: &mut usize) -> ParseResult<u32> {
if *i >= bytes.len() {
return Err(ParseError::InvalidLen(*i));
}
let ch = bytes[*i];
if ch >= b'0' && ch <= b'9' {
*i += 1;
Ok((ch - b'0') as u32)
} else {
Err(ParseError::InvalidChar(*i))
}
}
#[doc(hidden)]
#[inline]
#[cfg(all(
target_arch = "x86_64",
target_feature = "sse2",
target_feature = "ssse3"
))]
pub fn parse_to_epoch_millis_simd(input: &str) -> ParseResult<i64> {
let ts = parse_simd(input.as_bytes())?;
Ok(ts_to_epoch_millis(&ts))
}
#[doc(hidden)]
#[inline]
#[cfg(all(
target_arch = "x86_64",
target_feature = "sse2",
target_feature = "ssse3"
))]
pub fn parse_to_packed_timestamp_simd(input: &str) -> ParseResult<PackedTimestamp> {
let ts = parse_simd(input.as_bytes())?;
Ok(PackedTimestamp::new(
ts.year as i32,
ts.month as u32,
ts.day as u32,
ts.hour as u32,
ts.minute as u32,
ts.second as u32,
ts.millisecond as u32,
ts.offset_minute,
))
}
#[inline]
#[cfg(target_arch = "x86_64")]
#[cfg(all(
target_arch = "x86_64",
target_feature = "sse2",
target_feature = "ssse3"
))]
unsafe fn parse_simd_yyyy_mm_dd_hh_mm(bytes: *const u8) -> ParseResult<SimdTimestamp> {
use std::arch::x86_64::*;
const MIN_BYTES: &[u8] = "))))-)0-)0S))9))9))".as_bytes();
const MAX_BYTES: &[u8] = "@@@@-2@-4@U3@;6@;6@".as_bytes();
const SPACE_SEP_BYTES: &[u8] = "0000-00-00 00:00:00".as_bytes();
const REM_MIN_BYTES: &[u8] = "9-)Y*9))))))))))".as_bytes();
const REM_MAX_BYTES: &[u8] = ";/@[.;@@@@@@@@@@".as_bytes();
let mut timestamp = SimdTimestamp::default();
let ts_without_seconds = _mm_loadu_si128(bytes as *const __m128i);
let min = _mm_loadu_si128(MIN_BYTES.as_ptr() as *const __m128i);
let max = _mm_loadu_si128(MAX_BYTES.as_ptr() as *const __m128i);
let space = _mm_loadu_si128(SPACE_SEP_BYTES.as_ptr() as *const __m128i);
let gt = _mm_cmpgt_epi8(ts_without_seconds, min);
let lt = _mm_cmplt_epi8(ts_without_seconds, max);
let space_sep = _mm_cmpeq_epi8(ts_without_seconds, space);
let mask = _mm_or_si128(_mm_and_si128(gt, lt), space_sep);
let mask = _mm_movemask_epi8(mask);
if mask != 0xFFFF {
return Err(ParseError::InvalidChar((!mask).trailing_zeros() as usize));
}
let nums = _mm_sub_epi8(ts_without_seconds, space);
let nums = _mm_shuffle_epi8(
nums,
_mm_set_epi8(-1, -1, -1, -1, 15, 14, 12, 11, 9, 8, 6, 5, 3, 2, 1, 0),
);
let hundreds = _mm_and_si128(nums, _mm_set1_epi16(0x00FF));
let hundreds = _mm_mullo_epi16(hundreds, _mm_set1_epi16(10));
let ones = _mm_srli_epi16::<8>(nums);
let res = _mm_add_epi16(ones, hundreds);
let timestamp_ptr: *mut SimdTimestamp = &mut timestamp;
_mm_storeu_si128(timestamp_ptr as *mut __m128i, res);
Ok(timestamp)
}
#[inline]
#[cfg(all(
target_arch = "x86_64",
target_feature = "sse2",
target_feature = "ssse3"
))]
pub(crate) fn parse_simd(bytes: &[u8]) -> ParseResult<Timestamp> {
if bytes.len() < 16 {
return Err(ParseError::InvalidLen(bytes.len()));
}
let timestamp = unsafe { parse_simd_yyyy_mm_dd_hh_mm(bytes.as_ptr())? };
let (second, milli, offset_minutes) =
if let Some((second, nano, offset_sign)) = try_parse_seconds_and_millis_simd(bytes) {
let offset = match offset_sign {
b'Z' => 0,
b'+' | b'-' => {
let mut index = 24;
let tmp = parse_offset_minutes(bytes, &mut index)? as i32;
if offset_sign == b'-' {
-tmp
} else {
tmp
}
}
_ => return Err(ParseError::InvalidChar(23)),
};
(second, nano, offset)
} else {
let mut index = 16;
let (second, nano, offset_minutes) =
parse_seconds_and_nanos_and_offset_minutes_slow_path(bytes, &mut index)?;
(second, nano / 1_000_000, offset_minutes)
};
Ok(Timestamp {
year: timestamp.year_hi * 100 + timestamp.year_lo,
month: timestamp.month as u8,
day: timestamp.day as u8,
hour: timestamp.hour as u8,
minute: timestamp.minute as u8,
second: second as u8,
millisecond: milli,
offset_minute: offset_minutes,
})
}
#[inline(always)]
fn try_parse_seconds_and_millis_simd(input: &[u8]) -> Option<(u32, u32, u8)> {
if input.len() >= 24 {
let min: u64 = unsafe { std::ptr::read_unaligned(b":00.000+".as_ptr() as *const u64) };
let max: u64 = unsafe { std::ptr::read_unaligned(b":99.999Z".as_ptr() as *const u64) };
let buf = unsafe { std::ptr::read_unaligned(input.as_ptr().add(16) as *const u64) };
if buf < min || buf > max {
return None;
}
let buf = buf.to_le_bytes();
let second = (buf[1] - b'0') as u32 * 10 + (buf[2] - b'0') as u32;
let milli =
(buf[4] - b'0') as u32 * 100 + (buf[5] - b'0') as u32 * 10 + (buf[6] - b'0') as u32;
Some((second, milli, buf[7]))
} else {
None
}
}
pub fn parse_to_timestamp_millis(bytes: &[u8]) -> ParseResult<i64> {
#[cfg(target_feature = "sse4.1")]
{
let ts = parse_simd(bytes)?;
Ok(ts_to_epoch_millis(&ts))
}
#[cfg(not(target_feature = "sse4.1"))]
{
let ts = parse_scalar(bytes)?;
Ok(ts_to_epoch_millis(&ts))
}
}
#[cfg(test)]
#[cfg(all(
target_arch = "x86_64",
target_feature = "sse2",
target_feature = "ssse3"
))]
pub mod simd_tests {
use crate::error::ParseError;
use crate::parse::{parse_simd, Timestamp};
use crate::parse_to_epoch_millis_simd;
#[test]
fn test_valid() {
assert!(parse_simd(b"1970-01-01T00:00Z").is_ok());
assert!(parse_simd(b"1970-01-01T00:00:00Z").is_ok());
assert!(parse_simd(b"1970-01-01T00:00:00.000Z").is_ok());
assert!(parse_simd(b"1970-01-01 00:00Z").is_ok());
assert!(parse_simd(b"1970-01-01 00:00:00Z").is_ok());
assert!(parse_simd(b"1970-01-01 00:00:00.000Z").is_ok());
}
#[test]
fn test_invalid_len() {
assert_eq!(Err(ParseError::InvalidLen(0)), parse_simd(b""));
assert_eq!(Err(ParseError::InvalidLen(1)), parse_simd(b"X"));
assert_eq!(Err(ParseError::InvalidLen(4)), parse_simd(b"2020"));
}
#[test]
fn test_invalid_char() {
assert_eq!(
Err(ParseError::InvalidChar(0)),
parse_simd(b"X020-09-10T12:00:00Z")
);
assert_eq!(
Err(ParseError::InvalidChar(1)),
parse_simd(b"2X20-09-10T12:00:00Z")
);
assert_eq!(
Err(ParseError::InvalidChar(2)),
parse_simd(b"20X0-09-10T12:00:00Z")
);
assert_eq!(
Err(ParseError::InvalidChar(10)),
parse_simd(b"2020-09-10X12:00:00Z")
);
assert_eq!(
Err(ParseError::InvalidChar(10)),
parse_simd(b"2020-09-10X12:00/")
);
assert_eq!(
Err(ParseError::InvalidChar(15)),
parse_simd(b"2020-09-10T12:0X/")
);
}
#[test]
fn test_parse_simd() {
assert_eq!(
Timestamp::new(2345, 12, 24, 17, 30, 15, 123),
parse_simd(b"2345-12-24T17:30:15.123Z").unwrap()
);
}
#[test]
fn test_parse_with_offset_simd() {
assert_eq!(
Timestamp::new_with_offset_minute(2020, 9, 19, 11, 40, 20, 123, 2 * 60),
parse_simd(b"2020-09-19T11:40:20.123+02:00").unwrap()
);
}
#[test]
fn test_parse_millis_simd() {
let input = "2020-09-18T23:30:15Z";
let expected = chrono::DateTime::parse_from_rfc3339(input)
.unwrap()
.timestamp_millis();
let actual = parse_to_epoch_millis_simd(input).unwrap();
assert_eq!(expected, actual);
}
#[test]
fn test_parse_millis_simd_masked() {
let input = "2020-09-18T23:30:15Z--::ZZ";
let input = unsafe { input.get_unchecked(0..20) };
let expected = chrono::DateTime::parse_from_rfc3339(input)
.unwrap()
.timestamp_millis();
let actual = parse_to_epoch_millis_simd(input).unwrap();
assert_eq!(expected, actual);
}
}
#[cfg(test)]
mod scalar_tests {
use crate::{parse_scalar, parse_to_epoch_millis_scalar, Timestamp};
#[test]
fn test_parse_scalar() {
assert_eq!(
Timestamp::new(2345, 12, 24, 17, 30, 15, 123),
parse_scalar(b"2345-12-24T17:30:15.123Z").unwrap()
);
}
#[test]
fn test_parse_millis_scalar() {
let input = "2020-09-18T23:30:15Z";
let expected = chrono::DateTime::parse_from_rfc3339(input)
.unwrap()
.timestamp_millis();
let actual = parse_to_epoch_millis_scalar(input).unwrap();
assert_eq!(expected, actual);
}
}