use chrono::prelude::*;
use regex::Regex;
use serde::Serialize;
use std::cmp::PartialEq;
use std::fmt;
use std::mem;
#[derive(Serialize)]
#[serde(untagged, rename_all = "camelCase")]
pub enum DataType {
String(String),
Int(i64),
Float(f64),
Bool(bool),
DateTime(chrono::DateTime<Utc>),
}
impl fmt::Debug for DataType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
DataType::String(s) => write!(f, "String({})", s),
DataType::Int(i) => write!(f, "Int({})", i),
DataType::Float(fl) => write!(f, "Float({})", fl),
DataType::Bool(b) => write!(f, "Bool({})", b),
DataType::DateTime(d) => write!(f, "DateTime({})", d),
}
}
}
impl PartialEq for DataType {
fn eq(&self, other: &Self) -> bool {
if mem::discriminant(self) != mem::discriminant(other) {
return false;
}
return match (self, other) {
(DataType::String(v), DataType::String(v1)) => {
return v == v1;
}
(DataType::Int(v), DataType::Int(v1)) => {
return v == v1;
}
(DataType::Float(v), DataType::Float(v1)) => {
return v == v1;
}
(DataType::Bool(v), DataType::Bool(v1)) => {
return v == v1;
}
(DataType::DateTime(v), DataType::DateTime(v1)) => {
return v == v1;
}
_ => false,
};
}
}
pub fn detect_type(s: &str) -> DataType {
if let Some(b) = try_get_bool(s) {
DataType::Bool(b)
} else if let Some(f) = try_get_f64(s) {
if f.fract() == 0.0 {
if s.len() == 8 {
if let Some(d) = try_get_datetime(s) {
DataType::DateTime(d)
} else {
DataType::Int(f as i64)
}
} else {
DataType::Int(f as i64)
}
} else {
DataType::Float(f)
}
} else if let Some(d) = try_get_datetime(s.clone()) {
DataType::DateTime(d)
} else {
DataType::String(s.to_string())
}
}
fn try_get_bool(s: &str) -> Option<bool> {
match s.to_lowercase().as_str() {
"true" => Some(true),
"false" => Some(false),
_ => None,
}
}
fn try_get_f64(s: &str) -> Option<f64> {
if let Ok(f) = s.parse::<f64>() {
Some(f)
} else {
None
}
}
fn try_get_i64(s: &str) -> Option<i64> {
if let Ok(f) = s.parse::<i64>() {
Some(f)
} else {
None
}
}
fn try_get_datetime(s: &str) -> Option<DateTime<Utc>> {
if let Some(_) = try_get_i64(s) {
match s.len() {
8 => {
let y = &s[0..4].parse::<u32>().unwrap();
let m = &s[4..6].parse::<u32>().unwrap();
let d = &s[6..8].parse::<u32>().unwrap();
if is_date(*y, *m, *d) {
if let Ok(dt) = Utc.datetime_from_str(
format!("{}-{}-{} 00:00:00", &s[0..4], &s[4..6], &s[6..8]).as_str(),
"%Y-%m-%d %H:%M:%S",
) {
Some(dt)
} else {
None
}
} else {
None
}
}
_ => None,
}
} else {
if let Ok(re) = Regex::new(
r"(?x)
^(?P<year>\d{4})
(-|/|\.|å¹´)
(?P<month>\d{1,2})
(-|/|\.|月)
(?P<day>\d{1,2})
(
[^0-9]+(?P<hour>\d{1,2})
[:|æ—¶](?P<minute>\d{1,2})
(
[:|分](?P<second>\d{1,2})
([\.](?P<milli>\d{3}))?
)?
)?
",
) {
if let Some(caps) = re.captures(&s) {
if let Ok(year) = caps["year"].parse::<u32>() {
if let Ok(month) = caps["month"].parse::<u32>() {
if let Ok(day) = caps["day"].parse::<u32>() {
if is_date(year, month, day) {
let mut f = format!("{}-{}-{} 00:00:00.000", year, month, day);
if let Some(_) = caps.name("hour") {
if let Some(_) = caps.name("minute") {
if let Ok(hour) = caps["hour"].parse::<u32>() {
if let Ok(minute) = caps["minute"].parse::<u32>() {
if hour < 24 && minute < 60 {
f = format!(
"{}-{}-{} {}:{}:00.000",
year, month, day, hour, minute
);
if let Some(_) = caps.name("second") {
if let Ok(second) =
caps["second"].parse::<u32>()
{
if second < 60 {
f = format!(
"{}-{}-{} {}:{}:{}.000",
year,
month,
day,
hour,
minute,
second
);
if let Some(_) = caps.name("milli")
{
if let Ok(milli) =
caps["milli"].parse::<u32>()
{
f = format!(
"{}-{}-{} {}:{}:{}.{}",
year,
month,
day,
hour,
minute,
second,
milli
)
}
}
}
}
}
}
}
}
}
}
if let Ok(dt) =
Utc.datetime_from_str(f.as_str(), "%Y-%m-%d %H:%M:%S%.3f")
{
return Some(dt);
}
}
}
}
}
}
}
None
}
}
fn is_date(year: u32, month: u32, day: u32) -> bool {
if month < 1 || month > 12 {
false
} else {
match month {
1 | 3 | 5 | 7 | 8 | 10 | 12 if day > 0 && day < 32 => true,
4 | 6 | 9 | 11 if day > 0 && day < 31 => true,
2 if is_leap_year(year) && day > 0 && day < 30 => true,
2 if !is_leap_year(year) && day > 0 && day < 29 => true,
_ => false,
}
}
}
fn is_leap_year(year: u32) -> bool {
if year % 4 == 0 {
if year % 100 == 0 {
if year % 400 == 0 {
true
} else {
false
}
} else {
true
}
} else {
false
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn bool_works() {
let v: Vec<&str> = vec!["true", "True", "TRUE", "false", "False", "FALSE"];
let exp: Vec<DataType> = vec![
DataType::Bool(true),
DataType::Bool(true),
DataType::Bool(true),
DataType::Bool(false),
DataType::Bool(false),
DataType::Bool(false),
];
for (i, el) in v.iter().enumerate() {
let result = detect_type(el);
assert_eq!(result, exp[i]);
}
}
#[test]
fn int_works() {
let v: Vec<&str> = vec!["123", "0123", "465.0", "-34.0", "-27", "000", "0", "0.0"];
let exp: Vec<DataType> = vec![
DataType::Int(123),
DataType::Int(123),
DataType::Int(465),
DataType::Int(-34),
DataType::Int(-27),
DataType::Int(0),
DataType::Int(0),
DataType::Int(0),
];
for (i, el) in v.iter().enumerate() {
let result = detect_type(el);
assert_eq!(result, exp[i]);
}
}
#[test]
fn float_works() {
let v: Vec<&str> = vec![
"123.1", "0123.2", "465.389", "-34.2", "-27.99", "000.1", "0.00001", "-.2", ".324",
];
let exp: Vec<DataType> = vec![
DataType::Float(123.1),
DataType::Float(123.2),
DataType::Float(465.389),
DataType::Float(-34.2),
DataType::Float(-27.99),
DataType::Float(0.1),
DataType::Float(0.00001),
DataType::Float(-0.2),
DataType::Float(0.324),
];
for (i, el) in v.iter().enumerate() {
let result = detect_type(el);
assert_eq!(result, exp[i]);
}
}
#[test]
fn datetime_works() {
let v: Vec<&str> = vec![
"20220405",
"20221213",
"2022-03-04",
"2022-12-24",
"2022-1-13",
"2022-3-6",
"2022/03/06",
"2022/3/6",
"2022.03.06",
"2022.3.6",
"2022年03月06日",
"2022年3月6",
"2014-11-28T12:00:09Z",
"2022-03-04 13:04:05",
"2022-03-04 1:2:3",
"2022年03月04日 13:4:5",
"2022-03-04 13时04分05秒",
"2022年03月04日13:14",
"2022-03-04 13:25",
"2022-3-4 13:25",
"2022-3-4 1:5",
"2022-3-4 1:5:3",
"2022年03月04日13:14:15.123Z",
];
let exp: Vec<DataType> = vec![
DataType::DateTime(Utc.ymd(2022, 4, 5).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 12, 13).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 12, 24).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 1, 13).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
DataType::DateTime(Utc.ymd(2014, 11, 28).and_hms(12, 0, 9)), DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 2, 3)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 14, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 25, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 25, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 5, 0)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 5, 3)),
DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms_milli(13, 14, 15, 123)),
];
for (i, el) in v.iter().enumerate() {
let result = detect_type(el);
assert_eq!(result, exp[i]);
}
}
#[test]
fn string_works() {
let v: Vec<&str> = vec![
"fdsaf",
"0.3213-",
"2014-1111",
"2014-13-11",
"2014-12-32",
"2014-12-32 24:00:00",
"2014-12-32 24:00",
"2014-12-32 23:60:00",
"2014-12-32 23:10:60",
"2014-12-32 23:60",
];
let exp: Vec<DataType> = vec![
DataType::String("fdsaf".to_string()),
DataType::String("0.3213-".to_string()),
DataType::String("2014-1111".to_string()),
DataType::String("2014-13-11".to_string()),
DataType::String("2014-12-32".to_string()),
DataType::String("2014-12-32 24:00:00".to_string()),
DataType::String("2014-12-32 24:00".to_string()),
DataType::String("2014-12-32 23:60:00".to_string()),
DataType::String("2014-12-32 23:10:60".to_string()),
DataType::String("2014-12-32 23:60".to_string()),
];
for (i, el) in v.iter().enumerate() {
let result = detect_type(el);
assert_eq!(result, exp[i]);
}
}
}