1use chrono::prelude::*;
2use regex::Regex;
3use serde::Serialize;
4use std::cmp::PartialEq;
5use std::fmt;
6use std::mem;
7
8#[derive(Serialize)]
9#[serde(untagged, rename_all = "camelCase")]
10pub enum DataType {
11 String(String),
12 Int(i64),
13 Float(f64),
14 Bool(bool),
15 DateTime(chrono::DateTime<Utc>),
16}
17
18impl fmt::Debug for DataType {
19 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
20 match self {
21 DataType::String(s) => write!(f, "String({})", s),
22 DataType::Int(i) => write!(f, "Int({})", i),
23 DataType::Float(fl) => write!(f, "Float({})", fl),
24 DataType::Bool(b) => write!(f, "Bool({})", b),
25 DataType::DateTime(d) => write!(f, "DateTime({})", d),
26 }
27 }
28}
29
30impl PartialEq for DataType {
31 fn eq(&self, other: &Self) -> bool {
32 if mem::discriminant(self) != mem::discriminant(other) {
33 return false;
34 }
35 return match (self, other) {
36 (DataType::String(v), DataType::String(v1)) => {
37 return v == v1;
38 }
39 (DataType::Int(v), DataType::Int(v1)) => {
40 return v == v1;
41 }
42 (DataType::Float(v), DataType::Float(v1)) => {
43 return v == v1;
44 }
45 (DataType::Bool(v), DataType::Bool(v1)) => {
46 return v == v1;
47 }
48 (DataType::DateTime(v), DataType::DateTime(v1)) => {
49 return v == v1;
50 }
51 _ => false,
52 };
53 }
54}
55
56pub fn detect_type(s: &str) -> DataType {
57 if let Some(b) = try_get_bool(s) {
58 DataType::Bool(b)
59 } else if let Some(f) = try_get_f64(s) {
60 if f.fract() == 0.0 {
61 if s.len() == 8 {
62 if let Some(d) = try_get_datetime(s) {
63 DataType::DateTime(d)
64 } else {
65 DataType::Int(f as i64)
66 }
67 } else {
68 DataType::Int(f as i64)
69 }
70 } else {
71 DataType::Float(f)
72 }
73 } else if let Some(d) = try_get_datetime(s.clone()) {
74 DataType::DateTime(d)
75 } else {
76 DataType::String(s.to_string())
77 }
78}
79
80fn try_get_bool(s: &str) -> Option<bool> {
81 match s.to_lowercase().as_str() {
82 "true" => Some(true),
83 "false" => Some(false),
84 _ => None,
85 }
86}
87
88fn try_get_f64(s: &str) -> Option<f64> {
89 if let Ok(f) = s.parse::<f64>() {
90 Some(f)
91 } else {
92 None
93 }
94}
95
96fn try_get_i64(s: &str) -> Option<i64> {
97 if let Ok(f) = s.parse::<i64>() {
98 Some(f)
99 } else {
100 None
101 }
102}
103
104fn try_get_datetime(s: &str) -> Option<DateTime<Utc>> {
105 if let Some(_) = try_get_i64(s) {
107 match s.len() {
109 8 => {
111 let y = &s[0..4].parse::<u32>().unwrap();
112 let m = &s[4..6].parse::<u32>().unwrap();
113 let d = &s[6..8].parse::<u32>().unwrap();
114 if is_date(*y, *m, *d) {
115 if let Ok(dt) = Utc.datetime_from_str(
116 format!("{}-{}-{} 00:00:00", &s[0..4], &s[4..6], &s[6..8]).as_str(),
117 "%Y-%m-%d %H:%M:%S",
118 ) {
119 Some(dt)
120 } else {
121 None
122 }
123 } else {
124 None
125 }
126 }
127 _ => None,
128 }
129 } else {
130 if let Ok(re) = Regex::new(
132 r"(?x)
133^(?P<year>\d{4})
134(-|/|\.|年)
135(?P<month>\d{1,2})
136(-|/|\.|月)
137(?P<day>\d{1,2})
138(
139 [^0-9]+(?P<hour>\d{1,2})
140 [:|时](?P<minute>\d{1,2})
141 (
142 [:|分](?P<second>\d{1,2})
143 ([\.](?P<milli>\d{3}))?
144 )?
145)?
146",
147 ) {
148 if let Some(caps) = re.captures(&s) {
149 if let Ok(year) = caps["year"].parse::<u32>() {
150 if let Ok(month) = caps["month"].parse::<u32>() {
151 if let Ok(day) = caps["day"].parse::<u32>() {
152 if is_date(year, month, day) {
153 let mut f = format!("{}-{}-{} 00:00:00.000", year, month, day);
154 if let Some(_) = caps.name("hour") {
155 if let Some(_) = caps.name("minute") {
156 if let Ok(hour) = caps["hour"].parse::<u32>() {
157 if let Ok(minute) = caps["minute"].parse::<u32>() {
158 if hour < 24 && minute < 60 {
159 f = format!(
160 "{}-{}-{} {}:{}:00.000",
161 year, month, day, hour, minute
162 );
163 if let Some(_) = caps.name("second") {
164 if let Ok(second) =
165 caps["second"].parse::<u32>()
166 {
167 if second < 60 {
168 f = format!(
169 "{}-{}-{} {}:{}:{}.000",
170 year,
171 month,
172 day,
173 hour,
174 minute,
175 second
176 );
177 if let Some(_) = caps.name("milli")
178 {
179 if let Ok(milli) =
180 caps["milli"].parse::<u32>()
181 {
182 f = format!(
183 "{}-{}-{} {}:{}:{}.{}",
184 year,
185 month,
186 day,
187 hour,
188 minute,
189 second,
190 milli
191 )
192 }
193 }
194 }
195 }
196 }
197 }
198 }
199 }
200 }
201 }
202 if let Ok(dt) =
203 Utc.datetime_from_str(f.as_str(), "%Y-%m-%d %H:%M:%S%.3f")
204 {
205 return Some(dt);
206 }
207 }
208 }
209 }
210 }
211 }
212 }
213 None
214 }
215}
216
217fn is_date(year: u32, month: u32, day: u32) -> bool {
218 if month < 1 || month > 12 {
219 false
220 } else {
221 match month {
222 1 | 3 | 5 | 7 | 8 | 10 | 12 if day > 0 && day < 32 => true,
223 4 | 6 | 9 | 11 if day > 0 && day < 31 => true,
224 2 if is_leap_year(year) && day > 0 && day < 30 => true,
225 2 if !is_leap_year(year) && day > 0 && day < 29 => true,
226 _ => false,
227 }
228 }
229}
230
231fn is_leap_year(year: u32) -> bool {
232 if year % 4 == 0 {
233 if year % 100 == 0 {
234 if year % 400 == 0 {
235 true
236 } else {
237 false
238 }
239 } else {
240 true
241 }
242 } else {
243 false
244 }
245}
246
247#[cfg(test)]
248mod tests {
249
250 use super::*;
251
252 #[test]
253 fn bool_works() {
254 let v: Vec<&str> = vec!["true", "True", "TRUE", "false", "False", "FALSE"];
255 let exp: Vec<DataType> = vec![
256 DataType::Bool(true),
257 DataType::Bool(true),
258 DataType::Bool(true),
259 DataType::Bool(false),
260 DataType::Bool(false),
261 DataType::Bool(false),
262 ];
263 for (i, el) in v.iter().enumerate() {
264 let result = detect_type(el);
265 assert_eq!(result, exp[i]);
266 }
267 }
268
269 #[test]
270 fn int_works() {
271 let v: Vec<&str> = vec!["123", "0123", "465.0", "-34.0", "-27", "000", "0", "0.0"];
272 let exp: Vec<DataType> = vec![
273 DataType::Int(123),
274 DataType::Int(123),
275 DataType::Int(465),
276 DataType::Int(-34),
277 DataType::Int(-27),
278 DataType::Int(0),
279 DataType::Int(0),
280 DataType::Int(0),
281 ];
282 for (i, el) in v.iter().enumerate() {
283 let result = detect_type(el);
284 assert_eq!(result, exp[i]);
285 }
286 }
287
288 #[test]
289 fn float_works() {
290 let v: Vec<&str> = vec![
291 "123.1", "0123.2", "465.389", "-34.2", "-27.99", "000.1", "0.00001", "-.2", ".324",
292 ];
293 let exp: Vec<DataType> = vec![
294 DataType::Float(123.1),
295 DataType::Float(123.2),
296 DataType::Float(465.389),
297 DataType::Float(-34.2),
298 DataType::Float(-27.99),
299 DataType::Float(0.1),
300 DataType::Float(0.00001),
301 DataType::Float(-0.2),
302 DataType::Float(0.324),
303 ];
304 for (i, el) in v.iter().enumerate() {
305 let result = detect_type(el);
306 assert_eq!(result, exp[i]);
307 }
308 }
309
310 #[test]
311 fn datetime_works() {
312 let v: Vec<&str> = vec![
313 "20220405",
314 "20221213",
315 "2022-03-04",
316 "2022-12-24",
317 "2022-1-13",
318 "2022-3-6",
319 "2022/03/06",
320 "2022/3/6",
321 "2022.03.06",
322 "2022.3.6",
323 "2022年03月06日",
324 "2022年3月6",
325 "2014-11-28T12:00:09Z",
326 "2022-03-04 13:04:05",
327 "2022-03-04 1:2:3",
328 "2022年03月04日 13:4:5",
329 "2022-03-04 13时04分05秒",
330 "2022年03月04日13:14",
331 "2022-03-04 13:25",
332 "2022-3-4 13:25",
333 "2022-3-4 1:5",
334 "2022-3-4 1:5:3",
335 "2022年03月04日13:14:15.123Z",
336 ];
337 let exp: Vec<DataType> = vec![
338 DataType::DateTime(Utc.ymd(2022, 4, 5).and_hms(0, 0, 0)),
339 DataType::DateTime(Utc.ymd(2022, 12, 13).and_hms(0, 0, 0)),
340 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(0, 0, 0)),
341 DataType::DateTime(Utc.ymd(2022, 12, 24).and_hms(0, 0, 0)),
342 DataType::DateTime(Utc.ymd(2022, 1, 13).and_hms(0, 0, 0)),
343 DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
344 DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
345 DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
346 DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
347 DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
348 DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
349 DataType::DateTime(Utc.ymd(2022, 3, 6).and_hms(0, 0, 0)),
350 DataType::DateTime(Utc.ymd(2014, 11, 28).and_hms(12, 0, 9)), DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
352 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 2, 3)),
353 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
354 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 4, 5)),
355 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 14, 0)),
356 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 25, 0)),
357 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(13, 25, 0)),
358 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 5, 0)),
359 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms(1, 5, 3)),
360 DataType::DateTime(Utc.ymd(2022, 3, 4).and_hms_milli(13, 14, 15, 123)),
361 ];
362 for (i, el) in v.iter().enumerate() {
363 let result = detect_type(el);
364 assert_eq!(result, exp[i]);
365 }
366 }
367
368 #[test]
369 fn string_works() {
370 let v: Vec<&str> = vec![
371 "fdsaf",
372 "0.3213-",
373 "2014-1111",
374 "2014-13-11",
375 "2014-12-32",
376 "2014-12-32 24:00:00",
377 "2014-12-32 24:00",
378 "2014-12-32 23:60:00",
379 "2014-12-32 23:10:60",
380 "2014-12-32 23:60",
381 ];
382 let exp: Vec<DataType> = vec![
383 DataType::String("fdsaf".to_string()),
384 DataType::String("0.3213-".to_string()),
385 DataType::String("2014-1111".to_string()),
386 DataType::String("2014-13-11".to_string()),
387 DataType::String("2014-12-32".to_string()),
388 DataType::String("2014-12-32 24:00:00".to_string()),
389 DataType::String("2014-12-32 24:00".to_string()),
390 DataType::String("2014-12-32 23:60:00".to_string()),
391 DataType::String("2014-12-32 23:10:60".to_string()),
392 DataType::String("2014-12-32 23:60".to_string()),
393 ];
394 for (i, el) in v.iter().enumerate() {
395 let result = detect_type(el);
396 assert_eq!(result, exp[i]);
397 }
398 }
399}