1use regex::Regex;
7use std::sync::LazyLock;
8
9static DATE_PATTERNS: LazyLock<Vec<Regex>> = LazyLock::new(|| {
12 vec![
13 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])$").unwrap(),
15 Regex::new(r"^(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])/(19|20)\d{2}$").unwrap(),
17 Regex::new(r"^(0[1-9]|[12]\d|3[01])/(0[1-9]|1[0-2])/(19|20)\d{2}$").unwrap(),
19 Regex::new(r"^(0[1-9]|[12]\d|3[01])-(0[1-9]|1[0-2])-(19|20)\d{2}$").unwrap(),
21 Regex::new(r"^(19|20)\d{2}/(0[1-9]|1[0-2])/(0[1-9]|[12]\d|3[01])$").unwrap(),
23 Regex::new(r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T\d{2}:\d{2}:\d{2}")
25 .unwrap(),
26 Regex::new(
28 r"^(19|20)\d{2}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01])T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$",
29 )
30 .unwrap(),
31 ]
32});
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum InferredType {
37 Boolean,
38 Integer,
39 Float,
40 DateTime,
41 String,
42 Null,
43}
44
45pub struct TypeInference;
47
48impl TypeInference {
49 #[must_use]
54 pub fn infer_from_string(value: &str) -> InferredType {
55 if value.is_empty() {
57 return InferredType::Null;
58 }
59
60 if value.eq_ignore_ascii_case("true") || value.eq_ignore_ascii_case("false") {
62 return InferredType::Boolean;
63 }
64
65 if value.parse::<i64>().is_ok() {
67 return InferredType::Integer;
68 }
69
70 if value.parse::<f64>().is_ok() {
72 return InferredType::Float;
73 }
74
75 if Self::looks_like_datetime(value) {
78 return InferredType::DateTime;
79 }
80
81 InferredType::String
83 }
84
85 pub fn looks_like_datetime(value: &str) -> bool {
90 if value.len() < 8 || value.len() > 35 {
92 return false;
93 }
94
95 DATE_PATTERNS.iter().any(|pattern| pattern.is_match(value))
97 }
98
99 #[must_use]
109 pub fn merge_types(type1: InferredType, type2: InferredType) -> InferredType {
110 use InferredType::{Boolean, DateTime, Float, Integer, Null, String};
111
112 match (type1, type2) {
113 (t1, t2) if t1 == t2 => t1,
115
116 (Null, t) | (t, Null) => t,
118
119 (Integer, Float) | (Float, Integer) => Float,
121
122 (Boolean, _) | (_, Boolean) => String,
124
125 (DateTime, _) | (_, DateTime) => String,
127
128 _ => String,
130 }
131 }
132
133 pub fn infer_from_samples<'a, I>(values: I) -> InferredType
138 where
139 I: Iterator<Item = &'a str>,
140 {
141 let mut result_type = InferredType::Null;
142
143 for value in values {
144 let value_type = Self::infer_from_string(value);
145 result_type = Self::merge_types(result_type, value_type);
146
147 if result_type == InferredType::String {
149 break;
150 }
151 }
152
153 result_type
154 }
155
156 #[must_use]
158 pub fn can_coerce_to(value: &str, target_type: InferredType) -> bool {
159 match target_type {
160 InferredType::Boolean => {
161 value.eq_ignore_ascii_case("true")
162 || value.eq_ignore_ascii_case("false")
163 || value == "0"
164 || value == "1"
165 }
166 InferredType::Integer => value.parse::<i64>().is_ok(),
167 InferredType::Float => value.parse::<f64>().is_ok(),
168 InferredType::DateTime => Self::looks_like_datetime(value),
169 InferredType::String => true, InferredType::Null => value.is_empty(),
171 }
172 }
173}
174
175#[cfg(test)]
176mod tests {
177 use super::*;
178
179 #[test]
180 fn test_basic_type_inference() {
181 assert_eq!(
182 TypeInference::infer_from_string("123"),
183 InferredType::Integer
184 );
185 assert_eq!(
186 TypeInference::infer_from_string("123.45"),
187 InferredType::Float
188 );
189 assert_eq!(
190 TypeInference::infer_from_string("true"),
191 InferredType::Boolean
192 );
193 assert_eq!(
194 TypeInference::infer_from_string("FALSE"),
195 InferredType::Boolean
196 );
197 assert_eq!(
198 TypeInference::infer_from_string("hello"),
199 InferredType::String
200 );
201 assert_eq!(TypeInference::infer_from_string(""), InferredType::Null);
202 }
203
204 #[test]
205 fn test_datetime_detection() {
206 assert_eq!(
208 TypeInference::infer_from_string("2024-01-15"),
209 InferredType::DateTime
210 );
211 assert_eq!(
212 TypeInference::infer_from_string("01/15/2024"),
213 InferredType::DateTime
214 );
215 assert_eq!(
216 TypeInference::infer_from_string("15-01-2024"),
217 InferredType::DateTime
218 );
219 assert_eq!(
220 TypeInference::infer_from_string("2024-01-15T10:30:00"),
221 InferredType::DateTime
222 );
223 assert_eq!(
224 TypeInference::infer_from_string("2024-01-15T10:30:00Z"),
225 InferredType::DateTime
226 );
227 }
228
229 #[test]
230 fn test_id_strings_not_detected_as_datetime() {
231 assert_eq!(
233 TypeInference::infer_from_string("BQ-81198596"),
234 InferredType::String
235 );
236 assert_eq!(
237 TypeInference::infer_from_string("ORDER-2024-001"),
238 InferredType::String
239 );
240 assert_eq!(
241 TypeInference::infer_from_string("ID-123-456"),
242 InferredType::String
243 );
244 assert_eq!(
245 TypeInference::infer_from_string("ABC-DEF-GHI"),
246 InferredType::String
247 );
248 assert_eq!(
249 TypeInference::infer_from_string("2024-ABC-123"),
250 InferredType::String
251 );
252 }
253
254 #[test]
255 fn test_invalid_dates_not_detected() {
256 assert_eq!(
258 TypeInference::infer_from_string("2024-13-01"), InferredType::String
260 );
261 assert_eq!(
262 TypeInference::infer_from_string("2024-00-15"), InferredType::String
264 );
265 assert_eq!(
266 TypeInference::infer_from_string("2024-01-32"), InferredType::String
268 );
269 assert_eq!(
270 TypeInference::infer_from_string("2024-01-00"), InferredType::String
272 );
273 }
274
275 #[test]
276 fn test_type_merging() {
277 use InferredType::*;
278
279 assert_eq!(TypeInference::merge_types(Integer, Integer), Integer);
281 assert_eq!(TypeInference::merge_types(String, String), String);
282
283 assert_eq!(TypeInference::merge_types(Null, Integer), Integer);
285 assert_eq!(TypeInference::merge_types(Float, Null), Float);
286
287 assert_eq!(TypeInference::merge_types(Integer, Float), Float);
289 assert_eq!(TypeInference::merge_types(Float, Integer), Float);
290
291 assert_eq!(TypeInference::merge_types(Integer, String), String);
293 assert_eq!(TypeInference::merge_types(DateTime, Integer), String);
294 assert_eq!(TypeInference::merge_types(Boolean, Float), String);
295 }
296
297 #[test]
298 fn test_infer_from_samples() {
299 let samples = vec!["1", "2", "3", "4", "5"];
301 assert_eq!(
302 TypeInference::infer_from_samples(samples.into_iter()),
303 InferredType::Integer
304 );
305
306 let samples = vec!["1", "2.5", "3", "4.0"];
308 assert_eq!(
309 TypeInference::infer_from_samples(samples.into_iter()),
310 InferredType::Float
311 );
312
313 let samples = vec!["1", "hello", "3"];
315 assert_eq!(
316 TypeInference::infer_from_samples(samples.into_iter()),
317 InferredType::String
318 );
319
320 let samples = vec!["", "1", "", "2", "3"];
322 assert_eq!(
323 TypeInference::infer_from_samples(samples.into_iter()),
324 InferredType::Integer
325 );
326 }
327
328 #[test]
329 fn test_can_coerce() {
330 assert!(TypeInference::can_coerce_to("true", InferredType::Boolean));
332 assert!(TypeInference::can_coerce_to("1", InferredType::Boolean));
333 assert!(TypeInference::can_coerce_to("0", InferredType::Boolean));
334 assert!(!TypeInference::can_coerce_to(
335 "hello",
336 InferredType::Boolean
337 ));
338
339 assert!(TypeInference::can_coerce_to("123", InferredType::Integer));
341 assert!(!TypeInference::can_coerce_to(
342 "123.45",
343 InferredType::Integer
344 ));
345 assert!(!TypeInference::can_coerce_to(
346 "hello",
347 InferredType::Integer
348 ));
349
350 assert!(TypeInference::can_coerce_to("123", InferredType::String));
352 assert!(TypeInference::can_coerce_to("hello", InferredType::String));
353 assert!(TypeInference::can_coerce_to("", InferredType::String));
354 }
355}