use chrono::{DateTime, Local, NaiveDate, NaiveTime, TimeZone, Utc};
use memchr::memchr2;
use crate::TIMESTAMP_FORMAT;
#[inline(always)]
pub(crate) const fn decode_id3(v: u32) -> u32 {
(v & 0x7f) | (((v >> 8) & 0x7f) << 7) | (((v >> 16) & 0x7f) << 14) | (((v >> 24) & 0x7f) << 21)
}
#[inline]
pub(crate) fn nonmagic(str: &str) -> usize {
let mut rv = 0;
let mut chars = str.chars().peekable();
while let Some(c) = chars.next() {
match c {
'\\' => {
if chars.peek().is_none() {
rv += 1; } else {
chars.next(); rv += 1;
}
}
'?' | '*' | '.' | '+' | '^' | '$' => {
continue;
}
'[' => {
rv += 1;
while let Some(&ch) = chars.peek() {
chars.next();
if ch == ']' {
break;
}
}
}
'{' => {
while let Some(&ch) = chars.peek() {
chars.next();
if ch == '}' {
break;
}
}
}
_ => {
rv += 1;
}
}
}
if rv == 0 { 1 } else { rv }
}
pub(crate) fn parse_fat_date(fat_date: u16) -> Option<NaiveDate> {
let day = (fat_date & 0x1f) as u32; let month = ((fat_date >> 5) & 0xf) as u32; let year = (fat_date >> 9) as i32 + 1980; NaiveDate::from_ymd_opt(year, month, day)
}
pub(crate) fn parse_fat_time(fat_time: u16) -> Option<NaiveTime> {
let sec = (fat_time & 0x1f) * 2;
let min = (fat_time >> 5) & 0b111111;
let hour = (fat_time >> 11) & 0b11111;
NaiveTime::from_hms_opt(hour as u32, min as u32, sec as u32)
}
fn windows_time_to_datetime(filetime: i64) -> Option<DateTime<Utc>> {
let windows_epoch = Utc.with_ymd_and_hms(1601, 1, 1, 0, 0, 0).single()?;
let secs = filetime / 10_000_000;
let nanos = (filetime % 10_000_000) * 100;
Some(windows_epoch + chrono::Duration::seconds(secs) + chrono::Duration::nanoseconds(nanos))
}
#[inline(always)]
pub(crate) fn unix_local_time_to_string(timestamp: i64) -> String {
Local
.timestamp_opt(timestamp, 0)
.earliest()
.map(|ts| ts.naive_local().format(TIMESTAMP_FORMAT).to_string())
.unwrap_or("invalid timestamp".into())
}
#[inline(always)]
pub(crate) fn unix_utc_time_to_string(timestamp: i64) -> String {
DateTime::from_timestamp(timestamp, 0)
.map(|ts| ts.format(TIMESTAMP_FORMAT).to_string())
.unwrap_or("invalid timestamp".into())
}
#[inline(always)]
pub(crate) fn windows_filetime_to_string(timestamp: i64) -> String {
windows_time_to_datetime(timestamp)
.map(|ts| ts.format(TIMESTAMP_FORMAT).to_string())
.unwrap_or("invalid timestamp".into())
}
#[inline(always)]
pub(crate) fn find_json_boundaries<S: AsRef<[u8]>>(buf: S) -> Option<(usize, usize)> {
let buf = buf.as_ref();
let mut cnt = 0usize;
let mut in_string = false;
let mut prev_char = 0u8;
let i_open = memchr2(b'[', b'{', buf)?;
let (opening, closing) = if buf[i_open] == b'[' {
(b'[', b']')
} else {
(b'{', b'}')
};
for (i, c) in buf[i_open..].iter().enumerate() {
if c == &b'"' && prev_char != b'\\' {
in_string ^= true
}
if c == &opening && !in_string {
cnt += 1
} else if c == &closing && !in_string {
cnt = cnt.saturating_sub(1)
}
if cnt == 0 {
return Some((i_open, i + i_open));
}
prev_char = *c;
}
None
}
pub(crate) fn debug_string_from_vec_u8(data: &[u8]) -> String {
let mut result = String::new();
for &byte in data {
if byte.is_ascii_graphic() || byte == b' ' {
result.push(byte as char);
} else {
result.push_str(&format!("\\x{:02x}", byte));
}
}
result
}
pub(crate) fn debug_string_from_vec_u16(data: &[u16]) -> String {
let mut result = String::new();
for &short in data {
for byte in short.to_be_bytes() {
if byte.is_ascii_graphic() || byte == b' ' {
result.push(byte as char);
} else {
result.push_str(&format!("\\x{:02x}", byte));
}
}
}
result
}
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
pub(crate) struct Utf8Error {
pub(crate) valid_up_to: usize,
pub(crate) error_len: Option<u8>,
}
const UTF8_CHAR_WIDTH: &[u8; 256] = &[
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
#[inline]
const fn utf8_char_width(b: u8) -> usize {
UTF8_CHAR_WIDTH[b as usize] as usize
}
#[inline(always)]
pub(crate) fn run_utf8_validation(v: &[u8]) -> Result<bool, Utf8Error> {
let mut index = 0;
let len = v.len();
let mut ascii_only = len > 0;
while index < len {
let old_offset = index;
macro_rules! err {
($error_len: expr) => {
return Err(Utf8Error {
valid_up_to: old_offset,
error_len: $error_len,
})
};
}
macro_rules! next {
() => {{
index += 1;
if index >= len {
err!(None)
}
v[index]
}};
}
let first = v[index];
if first >= 128 {
let w = utf8_char_width(first);
match w {
2 => {
if next!() as i8 >= -64 {
err!(Some(1))
}
}
3 => {
match (first, next!()) {
(0xE0, 0xA0..=0xBF)
| (0xE1..=0xEC, 0x80..=0xBF)
| (0xED, 0x80..=0x9F)
| (0xEE..=0xEF, 0x80..=0xBF) => {}
_ => err!(Some(1)),
}
if next!() as i8 >= -64 {
err!(Some(2))
}
}
4 => {
match (first, next!()) {
(0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
_ => err!(Some(1)),
}
if next!() as i8 >= -64 {
err!(Some(2))
}
if next!() as i8 >= -64 {
err!(Some(3))
}
}
_ => err!(Some(1)),
}
index += 1;
ascii_only = false;
} else {
index += 1;
}
}
Ok(ascii_only)
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::Datelike;
#[test]
fn test_valid_dates() {
let cases = [
(0x5490, (2022, 4, 16)),
(0x21, (1980, 1, 1)),
(0xff9f, (2107, 12, 31)),
];
for (fat_date, (year, month, day)) in cases {
let parsed = parse_fat_date(fat_date).unwrap();
assert_eq!(parsed.year(), year);
assert_eq!(parsed.month(), month);
assert_eq!(parsed.day(), day);
}
}
#[test]
fn test_invalid_dates() {
assert_eq!(parse_fat_date(0xffa0), None)
}
#[test]
fn test_valid_times() {
assert_eq!(parse_fat_time(0x0), NaiveTime::from_hms_opt(0, 0, 0));
assert_eq!(parse_fat_time(0xbf7d), NaiveTime::from_hms_opt(23, 59, 58));
assert_eq!(parse_fat_time(18301), NaiveTime::from_hms_opt(8, 59, 58));
}
#[test]
fn test_windows_date() {
assert_eq!(windows_filetime_to_string(0), "1601-01-01 00:00:00");
assert_eq!(
windows_filetime_to_string(132723834270000000),
"2021-08-02 13:10:27"
);
}
#[test]
fn test_decode_id3() {
assert_eq!(decode_id3(0x00000000), 0);
assert_eq!(decode_id3(0x0000007F), 127);
assert_eq!(decode_id3(0x00007F7F), 16_383);
assert_eq!(decode_id3(0x0000017E), 254);
assert_eq!(decode_id3(0x7F7F7F7F), 268_435_455);
}
#[test]
fn test_find_json_boundaries() {
assert_eq!(find_json_boundaries(b"{\"key\": \"value\"}"), Some((0, 15)));
assert_eq!(find_json_boundaries(b"{\"a\": {\"b\": []}}"), Some((0, 15)));
assert_eq!(find_json_boundaries(b"{\"a\": [1, 2, 3]}"), Some((0, 15)));
assert_eq!(find_json_boundaries(b"[1, 2, 3]"), Some((0, 8)));
assert_eq!(
find_json_boundaries(b"[{\"a\": 1}, {\"b\": 2}]"),
Some((0, 19))
);
assert_eq!(
find_json_boundaries(b"{\"a\": {\"b\": {\"c\": []}}}"),
Some((0, 22))
);
assert_eq!(find_json_boundaries(b"[[[1, 2], [3, 4]]]"), Some((0, 17)));
assert_eq!(
find_json_boundaries(b"{\"key\": \"va\\\"lue\"}"),
Some((0, 17))
);
assert_eq!(
find_json_boundaries(b"[{\"a\": \"va\\\"lue\"}]"),
Some((0, 17))
);
assert_eq!(find_json_boundaries(b"no json here"), None);
assert_eq!(find_json_boundaries(b"{\"key\": \"value\""), None);
assert_eq!(find_json_boundaries(b"[1, 2, 3"), None);
assert_eq!(find_json_boundaries(b"{\"a\": 1}{\"b\": 2}"), Some((0, 7)));
assert_eq!(find_json_boundaries(b"[1, 2][3, 4]"), Some((0, 5)));
assert_eq!(find_json_boundaries(b"{}"), Some((0, 1)));
assert_eq!(find_json_boundaries(b"[]"), Some((0, 1)));
assert_eq!(find_json_boundaries(b""), None);
assert_eq!(find_json_boundaries(b" {\"a\": 1} "), Some((3, 10)));
}
}