type-detector 0.1.6

Data type detector from String value
Documentation
use chrono::prelude::*;
use regex::Regex;
use serde::Serialize;
use std::cmp::PartialEq;
use std::fmt;
use std::mem;

#[derive(Serialize)]
#[serde(untagged, rename_all = "camelCase")]
pub enum DataType {
    String(String),
    Int(i64),
    Float(f64),
    Bool(bool),
    DateTime(chrono::DateTime<Utc>),
}

impl fmt::Debug for DataType {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            DataType::String(s) => write!(f, "String({})", s),
            DataType::Int(i) => write!(f, "Int({})", i),
            DataType::Float(fl) => write!(f, "Float({})", fl),
            DataType::Bool(b) => write!(f, "Bool({})", b),
            DataType::DateTime(d) => write!(f, "DateTime({})", d),
        }
    }
}

impl PartialEq for DataType {
    fn eq(&self, other: &Self) -> bool {
        if mem::discriminant(self) != mem::discriminant(other) {
            return false;
        }
        return match (self, other) {
            (DataType::String(v), DataType::String(v1)) => {
                return v == v1;
            }
            (DataType::Int(v), DataType::Int(v1)) => {
                return v == v1;
            }
            (DataType::Float(v), DataType::Float(v1)) => {
                return v == v1;
            }
            (DataType::Bool(v), DataType::Bool(v1)) => {
                return v == v1;
            }
            (DataType::DateTime(v), DataType::DateTime(v1)) => {
                return v == v1;
            }
            _ => false,
        };
    }
}

pub fn detect_type(s: &str) -> DataType {
    if let Some(b) = try_get_bool(s) {
        DataType::Bool(b)
    } else if let Some(f) = try_get_f64(s) {
        if f.fract() == 0.0 {
            if s.len() == 8 {
                if let Some(d) = try_get_datetime(s) {
                    DataType::DateTime(d)
                } else {
                    DataType::Int(f as i64)
                }
            } else {
                DataType::Int(f as i64)
            }
        } else {
            DataType::Float(f)
        }
    } else if let Some(d) = try_get_datetime(s.clone()) {
        DataType::DateTime(d)
    } else {
        DataType::String(s.to_string())
    }
}

fn try_get_bool(s: &str) -> Option<bool> {
    match s.to_lowercase().as_str() {
        "true" => Some(true),
        "false" => Some(false),
        _ => None,
    }
}

fn try_get_f64(s: &str) -> Option<f64> {
    if let Ok(f) = s.parse::<f64>() {
        Some(f)
    } else {
        None
    }
}

fn try_get_i64(s: &str) -> Option<i64> {
    if let Ok(f) = s.parse::<i64>() {
        Some(f)
    } else {
        None
    }
}

fn try_get_datetime(s: &str) -> Option<DateTime<Utc>> {
    // pure numbers
    if let Some(_) = try_get_i64(s) {
        // yyyyMMdd
        match s.len() {
            // yyyyMMdd
            8 => {
                let y = &s[0..4].parse::<u32>().unwrap();
                let m = &s[4..6].parse::<u32>().unwrap();
                let d = &s[6..8].parse::<u32>().unwrap();
                if is_date(*y, *m, *d) {
                    if let Ok(dt) = Utc.datetime_from_str(
                        format!("{}-{}-{} 00:00:00", &s[0..4], &s[4..6], &s[6..8]).as_str(),
                        "%Y-%m-%d %H:%M:%S",
                    ) {
                        Some(dt)
                    } else {
                        None
                    }
                } else {
                    None
                }
            }
            _ => None,
        }
    } else {
        // 2006-03-04, 2006-3-4, 2006/03/04, 2006/3/4, 2006.3.4, 2006.03.04 optional with time
        if let Ok(re) = Regex::new(
            r"(?x)
^(?P<year>\d{4})
(-|/|\.|å¹´)
(?P<month>\d{1,2})
(-|/|\.|月)
(?P<day>\d{1,2})
(
    [^0-9]+(?P<hour>\d{1,2})
    [:|æ—¶](?P<minute>\d{1,2})
    (
        [:|分](?P<second>\d{1,2})
        ([\.](?P<milli>\d{3}))?
    )?
)?
",
        ) {
            if let Some(caps) = re.captures(&s) {
                if let Ok(year) = caps["year"].parse::<u32>() {
                    if let Ok(month) = caps["month"].parse::<u32>() {
                        if let Ok(day) = caps["day"].parse::<u32>() {
                            if is_date(year, month, day) {
                                let mut f = format!("{}-{}-{} 00:00:00.000", year, month, day);
                                if let Some(_) = caps.name("hour") {
                                    if let Some(_) = caps.name("minute") {
                                        if let Ok(hour) = caps["hour"].parse::<u32>() {
                                            if let Ok(minute) = caps["minute"].parse::<u32>() {
                                                if hour < 24 && minute < 60 {
                                                    f = format!(
                                                        "{}-{}-{} {}:{}:00.000",
                                                        year, month, day, hour, minute
                                                    );
                                                    if let Some(_) = caps.name("second") {
                                                        if let Ok(second) =
                                                            caps["second"].parse::<u32>()
                                                        {
                                                            if second < 60 {
                                                                f = format!(
                                                                    "{}-{}-{} {}:{}:{}.000",
                                                                    year,
                                                                    month,
                                                                    day,
                                                                    hour,
                                                                    minute,
                                                                    second
                                                                );
                                                                if let Some(_) = caps.name("milli")
                                                                {
                                                                    if let Ok(milli) =
                                                                        caps["milli"].parse::<u32>()
                                                                    {
                                                                        f = format!(
                                                                            "{}-{}-{} {}:{}:{}.{}",
                                                                            year,
                                                                            month,
                                                                            day,
                                                                            hour,
                                                                            minute,
                                                                            second,
                                                                            milli
                                                                        )
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                                if let Ok(dt) =
                                    Utc.datetime_from_str(f.as_str(), "%Y-%m-%d %H:%M:%S%.3f")
                                {
                                    return Some(dt);
                                }
                            }
                        }
                    }
                }
            }
        }
        None
    }
}

fn is_date(year: u32, month: u32, day: u32) -> bool {
    if month < 1 || month > 12 {
        false
    } else {
        match month {
            1 | 3 | 5 | 7 | 8 | 10 | 12 if day > 0 && day < 32 => true,
            4 | 6 | 9 | 11 if day > 0 && day < 31 => true,
            2 if is_leap_year(year) && day > 0 && day < 30 => true,
            2 if !is_leap_year(year) && day > 0 && day < 29 => true,
            _ => false,
        }
    }
}

fn is_leap_year(year: u32) -> bool {
    if year % 4 == 0 {
        if year % 100 == 0 {
            if year % 400 == 0 {
                true
            } else {
                false
            }
        } else {
            true
        }
    } else {
        false
    }
}

#[cfg(test)]
mod tests {

    use super::*;

    #[test]
    fn bool_works() {
        let v: Vec<&str> = vec!["true", "True", "TRUE", "false", "False", "FALSE"];
        let exp: Vec<DataType> = vec![
            DataType::Bool(true),
            DataType::Bool(true),
            DataType::Bool(true),
            DataType::Bool(false),
            DataType::Bool(false),
            DataType::Bool(false),
        ];
        for (i, el) in v.iter().enumerate() {
            let result = detect_type(el);
            assert_eq!(result, exp[i]);
        }
    }

    #[test]
    fn int_works() {
        let v: Vec<&str> = vec!["123", "0123", "465.0", "-34.0", "-27", "000", "0", "0.0"];
        let exp: Vec<DataType> = vec![
            DataType::Int(123),
            DataType::Int(123),
            DataType::Int(465),
            DataType::Int(-34),
            DataType::Int(-27),
            DataType::Int(0),
            DataType::Int(0),
            DataType::Int(0),
        ];
        for (i, el) in v.iter().enumerate() {
            let result = detect_type(el);
            assert_eq!(result, exp[i]);
        }
    }

    #[test]
    fn float_works() {
        let v: Vec<&str> = vec![
            "123.1", "0123.2", "465.389", "-34.2", "-27.99", "000.1", "0.00001", "-.2", ".324",
        ];
        let exp: Vec<DataType> = vec![
            DataType::Float(123.1),
            DataType::Float(123.2),
            DataType::Float(465.389),
            DataType::Float(-34.2),
            DataType::Float(-27.99),
            DataType::Float(0.1),
            DataType::Float(0.00001),
            DataType::Float(-0.2),
            DataType::Float(0.324),
        ];
        for (i, el) in v.iter().enumerate() {
            let result = detect_type(el);
            assert_eq!(result, exp[i]);
        }
    }

    #[test]
    fn datetime_works() {
        let v: Vec<&str> = vec![
            "20220405",
            "20221213",
            "2022-03-04",
            "2022-12-24",
            "2022-1-13",
            "2022-3-6",
            "2022/03/06",
            "2022/3/6",
            "2022.03.06",
            "2022.3.6",
            "2022年03月06日",
            "2022年3月6",
            "2014-11-28T12:00:09Z",
            "2022-03-04 13:04:05",
            "2022-03-04 1:2:3",
            "2022年03月04日 13:4:5",
            "2022-03-04 13时04分05秒",
            "2022年03月04日13:14",
            "2022-03-04 13:25",
            "2022-3-4 13:25",
            "2022-3-4 1:5",
            "2022-3-4 1:5:3",
            "2022年03月04日13:14:15.123Z",
        ];
        let exp: Vec<DataType> = vec![
            DataType::DateTime(Utc.ymd(2022, 4, 5).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 12, 13).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 12, 24).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 1, 13).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
            DataType::DateTime(Utc.ymd(2014, 11, 28).and_hms(12, 0, 9)), // time starts
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 2, 3)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 14, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 25, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 25, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 5, 0)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 5, 3)),
            DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms_milli(13, 14, 15, 123)),
        ];
        for (i, el) in v.iter().enumerate() {
            let result = detect_type(el);
            assert_eq!(result, exp[i]);
        }
    }

    #[test]
    fn string_works() {
        let v: Vec<&str> = vec![
            "fdsaf",
            "0.3213-",
            "2014-1111",
            "2014-13-11",
            "2014-12-32",
            "2014-12-32 24:00:00",
            "2014-12-32 24:00",
            "2014-12-32 23:60:00",
            "2014-12-32 23:10:60",
            "2014-12-32 23:60",
        ];
        let exp: Vec<DataType> = vec![
            DataType::String("fdsaf".to_string()),
            DataType::String("0.3213-".to_string()),
            DataType::String("2014-1111".to_string()),
            DataType::String("2014-13-11".to_string()),
            DataType::String("2014-12-32".to_string()),
            DataType::String("2014-12-32 24:00:00".to_string()),
            DataType::String("2014-12-32 24:00".to_string()),
            DataType::String("2014-12-32 23:60:00".to_string()),
            DataType::String("2014-12-32 23:10:60".to_string()),
            DataType::String("2014-12-32 23:60".to_string()),
        ];
        for (i, el) in v.iter().enumerate() {
            let result = detect_type(el);
            assert_eq!(result, exp[i]);
        }
    }
}