1use regex::Regex;
7use std::sync::LazyLock;
8
9static DATE_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
12 vec![
13 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$").unwrap(),
15 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])\s+\d{2}:\d{2}:\d{2}$").unwrap(),
17 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])\s+\d{2}:\d{2}:\d{2}\.\d{1,3}$").unwrap(),
19 Regex::new(r"^(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/(19|20)\d{2}$").unwrap(),
21 Regex::new(r"^(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/(19|20)\d{2}$").unwrap(),
23 Regex::new(r"^(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/(19|20)\d{2}\s+\d{2}:\d{2}:\d{2}$").unwrap(),
25 Regex::new(r"^(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/(19|20)\d{2}\s+\d{2}:\d{2}:\d{2}\.\d{1,3}$").unwrap(),
27 Regex::new(r"^(0[1-9]|[12]\d|3[01])-(0[1-9]|1[0-2])-(19|20)\d{2}$").unwrap(),
29 Regex::new(r"^(19|20)\d{2}/(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])$").unwrap(),
31 Regex::new(r"^(19|20)\d{2}/(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])\s+\d{2}:\d{2}:\d{2}$").unwrap(),
33 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T\d{2}:\d{2}:\d{2}")
35 .unwrap(),
36 Regex::new(
38 r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$",
39 )
40 .unwrap(),
41 ]
42});
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
46pub enum InferredType {
47 Boolean,
48 Integer,
49 Float,
50 DateTime,
51 String,
52 Null,
53}
54
55pub struct TypeInference;
57
58impl TypeInference {
59 #[must_use]
64 pub fn infer_from_string(value: &str) -> InferredType {
65 if value.is_empty() {
67 return InferredType::Null;
68 }
69
70 if value.eq_ignore_ascii_case("true") || value.eq_ignore_ascii_case("false") {
72 return InferredType::Boolean;
73 }
74
75 if value.parse::<i64>().is_ok() {
77 return InferredType::Integer;
78 }
79
80 if value.parse::<f64>().is_ok() {
82 return InferredType::Float;
83 }
84
85 if Self::looks_like_datetime(value) {
88 return InferredType::DateTime;
89 }
90
91 InferredType::String
93 }
94
95 pub fn looks_like_datetime(value: &str) -> bool {
100 if value.len() < 8 || value.len() > 35 {
102 return false;
103 }
104
105 DATE_PATTERNS.iter().any(|pattern| pattern.is_match(value))
107 }
108
109 #[must_use]
119 pub fn merge_types(type1: InferredType, type2: InferredType) -> InferredType {
120 use InferredType::{Boolean, DateTime, Float, Integer, Null, String};
121
122 match (type1, type2) {
123 (t1, t2) if t1 == t2 => t1,
125
126 (Null, t) | (t, Null) => t,
128
129 (Integer, Float) | (Float, Integer) => Float,
131
132 (Boolean, _) | (_, Boolean) => String,
134
135 (DateTime, _) | (_, DateTime) => String,
137
138 _ => String,
140 }
141 }
142
143 pub fn infer_from_samples<'a, I>(values: I) -> InferredType
148 where
149 I: Iterator<Item = &'a str>,
150 {
151 let mut result_type = InferredType::Null;
152
153 for value in values {
154 let value_type = Self::infer_from_string(value);
155 result_type = Self::merge_types(result_type, value_type);
156
157 if result_type == InferredType::String {
159 break;
160 }
161 }
162
163 result_type
164 }
165
166 #[must_use]
168 pub fn can_coerce_to(value: &str, target_type: InferredType) -> bool {
169 match target_type {
170 InferredType::Boolean => {
171 value.eq_ignore_ascii_case("true")
172 || value.eq_ignore_ascii_case("false")
173 || value == "0"
174 || value == "1"
175 }
176 InferredType::Integer => value.parse::<i64>().is_ok(),
177 InferredType::Float => value.parse::<f64>().is_ok(),
178 InferredType::DateTime => Self::looks_like_datetime(value),
179 InferredType::String => true, InferredType::Null => value.is_empty(),
181 }
182 }
183}
184
185#[cfg(test)]
186mod tests {
187 use super::*;
188
189 #[test]
190 fn test_basic_type_inference() {
191 assert_eq!(
192 TypeInference::infer_from_string("123"),
193 InferredType::Integer
194 );
195 assert_eq!(
196 TypeInference::infer_from_string("123.45"),
197 InferredType::Float
198 );
199 assert_eq!(
200 TypeInference::infer_from_string("true"),
201 InferredType::Boolean
202 );
203 assert_eq!(
204 TypeInference::infer_from_string("FALSE"),
205 InferredType::Boolean
206 );
207 assert_eq!(
208 TypeInference::infer_from_string("hello"),
209 InferredType::String
210 );
211 assert_eq!(TypeInference::infer_from_string(""), InferredType::Null);
212 }
213
214 #[test]
215 fn test_datetime_detection() {
216 assert_eq!(
218 TypeInference::infer_from_string("2024-01-15"),
219 InferredType::DateTime
220 );
221 assert_eq!(
222 TypeInference::infer_from_string("01/15/2024"),
223 InferredType::DateTime
224 );
225 assert_eq!(
226 TypeInference::infer_from_string("15-01-2024"),
227 InferredType::DateTime
228 );
229 assert_eq!(
230 TypeInference::infer_from_string("2024-01-15T10:30:00"),
231 InferredType::DateTime
232 );
233 assert_eq!(
234 TypeInference::infer_from_string("2024-01-15T10:30:00Z"),
235 InferredType::DateTime
236 );
237 }
238
239 #[test]
240 fn test_id_strings_not_detected_as_datetime() {
241 assert_eq!(
243 TypeInference::infer_from_string("BQ-81198596"),
244 InferredType::String
245 );
246 assert_eq!(
247 TypeInference::infer_from_string("ORDER-2024-001"),
248 InferredType::String
249 );
250 assert_eq!(
251 TypeInference::infer_from_string("ID-123-456"),
252 InferredType::String
253 );
254 assert_eq!(
255 TypeInference::infer_from_string("ABC-DEF-GHI"),
256 InferredType::String
257 );
258 assert_eq!(
259 TypeInference::infer_from_string("2024-ABC-123"),
260 InferredType::String
261 );
262 }
263
264 #[test]
265 fn test_invalid_dates_not_detected() {
266 assert_eq!(
268 TypeInference::infer_from_string("2024-13-01"), InferredType::String
270 );
271 assert_eq!(
272 TypeInference::infer_from_string("2024-00-15"), InferredType::String
274 );
275 assert_eq!(
276 TypeInference::infer_from_string("2024-01-32"), InferredType::String
278 );
279 assert_eq!(
280 TypeInference::infer_from_string("2024-01-00"), InferredType::String
282 );
283 }
284
285 #[test]
286 fn test_type_merging() {
287 use InferredType::*;
288
289 assert_eq!(TypeInference::merge_types(Integer, Integer), Integer);
291 assert_eq!(TypeInference::merge_types(String, String), String);
292
293 assert_eq!(TypeInference::merge_types(Null, Integer), Integer);
295 assert_eq!(TypeInference::merge_types(Float, Null), Float);
296
297 assert_eq!(TypeInference::merge_types(Integer, Float), Float);
299 assert_eq!(TypeInference::merge_types(Float, Integer), Float);
300
301 assert_eq!(TypeInference::merge_types(Integer, String), String);
303 assert_eq!(TypeInference::merge_types(DateTime, Integer), String);
304 assert_eq!(TypeInference::merge_types(Boolean, Float), String);
305 }
306
307 #[test]
308 fn test_infer_from_samples() {
309 let samples = vec!["1", "2", "3", "4", "5"];
311 assert_eq!(
312 TypeInference::infer_from_samples(samples.into_iter()),
313 InferredType::Integer
314 );
315
316 let samples = vec!["1", "2.5", "3", "4.0"];
318 assert_eq!(
319 TypeInference::infer_from_samples(samples.into_iter()),
320 InferredType::Float
321 );
322
323 let samples = vec!["1", "hello", "3"];
325 assert_eq!(
326 TypeInference::infer_from_samples(samples.into_iter()),
327 InferredType::String
328 );
329
330 let samples = vec!["", "1", "", "2", "3"];
332 assert_eq!(
333 TypeInference::infer_from_samples(samples.into_iter()),
334 InferredType::Integer
335 );
336 }
337
338 #[test]
339 fn test_can_coerce() {
340 assert!(TypeInference::can_coerce_to("true", InferredType::Boolean));
342 assert!(TypeInference::can_coerce_to("1", InferredType::Boolean));
343 assert!(TypeInference::can_coerce_to("0", InferredType::Boolean));
344 assert!(!TypeInference::can_coerce_to(
345 "hello",
346 InferredType::Boolean
347 ));
348
349 assert!(TypeInference::can_coerce_to("123", InferredType::Integer));
351 assert!(!TypeInference::can_coerce_to(
352 "123.45",
353 InferredType::Integer
354 ));
355 assert!(!TypeInference::can_coerce_to(
356 "hello",
357 InferredType::Integer
358 ));
359
360 assert!(TypeInference::can_coerce_to("123", InferredType::String));
362 assert!(TypeInference::can_coerce_to("hello", InferredType::String));
363 assert!(TypeInference::can_coerce_to("", InferredType::String));
364 }
365}