1use regex::Regex;
7use std::sync::LazyLock;
8
9static DATE_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
12 vec![
13 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$").unwrap(),
15 Regex::new(r"^(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/(19|20)\d{2}$").unwrap(),
17 Regex::new(r"^(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/(19|20)\d{2}$").unwrap(),
19 Regex::new(r"^(0[1-9]|[12]\d|3[01])-(0[1-9]|1[0-2])-(19|20)\d{2}$").unwrap(),
21 Regex::new(r"^(19|20)\d{2}/(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])$").unwrap(),
23 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T\d{2}:\d{2}:\d{2}")
25 .unwrap(),
26 Regex::new(
28 r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$",
29 )
30 .unwrap(),
31 ]
32});
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum InferredType {
37 Boolean,
38 Integer,
39 Float,
40 DateTime,
41 String,
42 Null,
43}
44
45pub struct TypeInference;
47
48impl TypeInference {
49 pub fn infer_from_string(value: &str) -> InferredType {
54 if value.is_empty() {
56 return InferredType::Null;
57 }
58
59 if value.eq_ignore_ascii_case("true") || value.eq_ignore_ascii_case("false") {
61 return InferredType::Boolean;
62 }
63
64 if value.parse::<i64>().is_ok() {
66 return InferredType::Integer;
67 }
68
69 if value.parse::<f64>().is_ok() {
71 return InferredType::Float;
72 }
73
74 if Self::looks_like_datetime(value) {
77 return InferredType::DateTime;
78 }
79
80 InferredType::String
82 }
83
84 pub fn looks_like_datetime(value: &str) -> bool {
89 if value.len() < 8 || value.len() > 35 {
91 return false;
92 }
93
94 DATE_PATTERNS.iter().any(|pattern| pattern.is_match(value))
96 }
97
98 pub fn merge_types(type1: InferredType, type2: InferredType) -> InferredType {
108 use InferredType::*;
109
110 match (type1, type2) {
111 (t1, t2) if t1 == t2 => t1,
113
114 (Null, t) | (t, Null) => t,
116
117 (Integer, Float) | (Float, Integer) => Float,
119
120 (Boolean, _) | (_, Boolean) => String,
122
123 (DateTime, _) | (_, DateTime) => String,
125
126 _ => String,
128 }
129 }
130
131 pub fn infer_from_samples<'a, I>(values: I) -> InferredType
136 where
137 I: Iterator<Item = &'a str>,
138 {
139 let mut result_type = InferredType::Null;
140
141 for value in values {
142 let value_type = Self::infer_from_string(value);
143 result_type = Self::merge_types(result_type, value_type);
144
145 if result_type == InferredType::String {
147 break;
148 }
149 }
150
151 result_type
152 }
153
154 pub fn can_coerce_to(value: &str, target_type: InferredType) -> bool {
156 match target_type {
157 InferredType::Boolean => {
158 value.eq_ignore_ascii_case("true")
159 || value.eq_ignore_ascii_case("false")
160 || value == "0"
161 || value == "1"
162 }
163 InferredType::Integer => value.parse::<i64>().is_ok(),
164 InferredType::Float => value.parse::<f64>().is_ok(),
165 InferredType::DateTime => Self::looks_like_datetime(value),
166 InferredType::String => true, InferredType::Null => value.is_empty(),
168 }
169 }
170}
171
172#[cfg(test)]
173mod tests {
174 use super::*;
175
176 #[test]
177 fn test_basic_type_inference() {
178 assert_eq!(
179 TypeInference::infer_from_string("123"),
180 InferredType::Integer
181 );
182 assert_eq!(
183 TypeInference::infer_from_string("123.45"),
184 InferredType::Float
185 );
186 assert_eq!(
187 TypeInference::infer_from_string("true"),
188 InferredType::Boolean
189 );
190 assert_eq!(
191 TypeInference::infer_from_string("FALSE"),
192 InferredType::Boolean
193 );
194 assert_eq!(
195 TypeInference::infer_from_string("hello"),
196 InferredType::String
197 );
198 assert_eq!(TypeInference::infer_from_string(""), InferredType::Null);
199 }
200
201 #[test]
202 fn test_datetime_detection() {
203 assert_eq!(
205 TypeInference::infer_from_string("2024-01-15"),
206 InferredType::DateTime
207 );
208 assert_eq!(
209 TypeInference::infer_from_string("01/15/2024"),
210 InferredType::DateTime
211 );
212 assert_eq!(
213 TypeInference::infer_from_string("15-01-2024"),
214 InferredType::DateTime
215 );
216 assert_eq!(
217 TypeInference::infer_from_string("2024-01-15T10:30:00"),
218 InferredType::DateTime
219 );
220 assert_eq!(
221 TypeInference::infer_from_string("2024-01-15T10:30:00Z"),
222 InferredType::DateTime
223 );
224 }
225
226 #[test]
227 fn test_id_strings_not_detected_as_datetime() {
228 assert_eq!(
230 TypeInference::infer_from_string("BQ-81198596"),
231 InferredType::String
232 );
233 assert_eq!(
234 TypeInference::infer_from_string("ORDER-2024-001"),
235 InferredType::String
236 );
237 assert_eq!(
238 TypeInference::infer_from_string("ID-123-456"),
239 InferredType::String
240 );
241 assert_eq!(
242 TypeInference::infer_from_string("ABC-DEF-GHI"),
243 InferredType::String
244 );
245 assert_eq!(
246 TypeInference::infer_from_string("2024-ABC-123"),
247 InferredType::String
248 );
249 }
250
251 #[test]
252 fn test_invalid_dates_not_detected() {
253 assert_eq!(
255 TypeInference::infer_from_string("2024-13-01"), InferredType::String
257 );
258 assert_eq!(
259 TypeInference::infer_from_string("2024-00-15"), InferredType::String
261 );
262 assert_eq!(
263 TypeInference::infer_from_string("2024-01-32"), InferredType::String
265 );
266 assert_eq!(
267 TypeInference::infer_from_string("2024-01-00"), InferredType::String
269 );
270 }
271
272 #[test]
273 fn test_type_merging() {
274 use InferredType::*;
275
276 assert_eq!(TypeInference::merge_types(Integer, Integer), Integer);
278 assert_eq!(TypeInference::merge_types(String, String), String);
279
280 assert_eq!(TypeInference::merge_types(Null, Integer), Integer);
282 assert_eq!(TypeInference::merge_types(Float, Null), Float);
283
284 assert_eq!(TypeInference::merge_types(Integer, Float), Float);
286 assert_eq!(TypeInference::merge_types(Float, Integer), Float);
287
288 assert_eq!(TypeInference::merge_types(Integer, String), String);
290 assert_eq!(TypeInference::merge_types(DateTime, Integer), String);
291 assert_eq!(TypeInference::merge_types(Boolean, Float), String);
292 }
293
294 #[test]
295 fn test_infer_from_samples() {
296 let samples = vec!["1", "2", "3", "4", "5"];
298 assert_eq!(
299 TypeInference::infer_from_samples(samples.into_iter()),
300 InferredType::Integer
301 );
302
303 let samples = vec!["1", "2.5", "3", "4.0"];
305 assert_eq!(
306 TypeInference::infer_from_samples(samples.into_iter()),
307 InferredType::Float
308 );
309
310 let samples = vec!["1", "hello", "3"];
312 assert_eq!(
313 TypeInference::infer_from_samples(samples.into_iter()),
314 InferredType::String
315 );
316
317 let samples = vec!["", "1", "", "2", "3"];
319 assert_eq!(
320 TypeInference::infer_from_samples(samples.into_iter()),
321 InferredType::Integer
322 );
323 }
324
325 #[test]
326 fn test_can_coerce() {
327 assert!(TypeInference::can_coerce_to("true", InferredType::Boolean));
329 assert!(TypeInference::can_coerce_to("1", InferredType::Boolean));
330 assert!(TypeInference::can_coerce_to("0", InferredType::Boolean));
331 assert!(!TypeInference::can_coerce_to(
332 "hello",
333 InferredType::Boolean
334 ));
335
336 assert!(TypeInference::can_coerce_to("123", InferredType::Integer));
338 assert!(!TypeInference::can_coerce_to(
339 "123.45",
340 InferredType::Integer
341 ));
342 assert!(!TypeInference::can_coerce_to(
343 "hello",
344 InferredType::Integer
345 ));
346
347 assert!(TypeInference::can_coerce_to("123", InferredType::String));
349 assert!(TypeInference::can_coerce_to("hello", InferredType::String));
350 assert!(TypeInference::can_coerce_to("", InferredType::String));
351 }
352}