1use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32 doc_section(label = "Time and Date Functions"),
33 description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40 syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41 sql_example = r#"```sql
42> select to_date('2023-01-31');
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31 |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31 |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
57"#,
58 standard_argument(name = "expression", prefix = "String"),
59 argument(
60 name = "format_n",
61 description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62 they appear with the first successful one being returned. If none of the formats successfully parse the expression
63 an error will be returned."
64 )
65)]
66#[derive(Debug)]
67pub struct ToDateFunc {
68 signature: Signature,
69}
70
71impl Default for ToDateFunc {
72 fn default() -> Self {
73 Self::new()
74 }
75}
76
77impl ToDateFunc {
78 pub fn new() -> Self {
79 Self {
80 signature: Signature::variadic_any(Volatility::Immutable),
81 }
82 }
83
84 fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85 match args.len() {
86 1 => handle::<Date32Type, _, Date32Type>(
87 args,
88 |s| match Date32Type::parse(s) {
89 Some(v) => Ok(v),
90 None => arrow_err!(ParseError(
91 "Unable to cast to Date32 for converting from i64 to i32 failed"
92 .to_string()
93 )),
94 },
95 "to_date",
96 ),
97 2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98 args,
99 |s, format| {
100 string_to_timestamp_millis_formatted(s, format)
101 .map(|n| n / (24 * 60 * 60 * 1_000))
102 .and_then(|v| {
103 v.try_into().map_err(|_| {
104 internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105 })
106 })
107 },
108 |n| n,
109 "to_date",
110 ),
111 0 => exec_err!("Unsupported 0 argument count for function to_date"),
112 }
113 }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117 fn as_any(&self) -> &dyn Any {
118 self
119 }
120
121 fn name(&self) -> &str {
122 "to_date"
123 }
124
125 fn signature(&self) -> &Signature {
126 &self.signature
127 }
128
129 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130 Ok(Date32)
131 }
132
133 fn invoke_with_args(
134 &self,
135 args: datafusion_expr::ScalarFunctionArgs,
136 ) -> Result<ColumnarValue> {
137 let args = args.args;
138 if args.is_empty() {
139 return exec_err!("to_date function requires 1 or more arguments, got 0");
140 }
141
142 if args.len() > 1 {
144 validate_data_types(&args, "to_date")?;
145 }
146
147 match args[0].data_type() {
148 Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149 args[0].cast_to(&Date32, None)
150 }
151 Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152 other => {
153 exec_err!("Unsupported data type {:?} for function to_date", other)
154 }
155 }
156 }
157
158 fn documentation(&self) -> Option<&Documentation> {
159 self.doc()
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
166 use arrow::datatypes::{DataType, Field};
167 use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
168 use datafusion_common::{DataFusionError, ScalarValue};
169 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
170 use std::sync::Arc;
171
172 use super::ToDateFunc;
173
174 fn invoke_to_date_with_args(
175 args: Vec<ColumnarValue>,
176 number_rows: usize,
177 ) -> Result<ColumnarValue, DataFusionError> {
178 let arg_fields = args
179 .iter()
180 .map(|arg| Field::new("a", arg.data_type(), true).into())
181 .collect::<Vec<_>>();
182
183 let args = datafusion_expr::ScalarFunctionArgs {
184 args,
185 arg_fields,
186 number_rows,
187 return_field: Field::new("f", DataType::Date32, true).into(),
188 };
189 ToDateFunc::new().invoke_with_args(args)
190 }
191
192 #[test]
193 fn test_to_date_without_format() {
194 struct TestCase {
195 name: &'static str,
196 date_str: &'static str,
197 }
198
199 let test_cases = vec![
200 TestCase {
201 name: "Largest four-digit year (9999)",
202 date_str: "9999-12-31",
203 },
204 TestCase {
205 name: "Year 1 (0001)",
206 date_str: "0001-12-31",
207 },
208 TestCase {
209 name: "Year before epoch (1969)",
210 date_str: "1969-01-01",
211 },
212 TestCase {
213 name: "Switch Julian/Gregorian calendar (1582-10-10)",
214 date_str: "1582-10-10",
215 },
216 ];
217
218 for tc in &test_cases {
219 test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
220 test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
221 test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
222
223 test_array::<GenericStringArray<i32>>(tc);
224 test_array::<GenericStringArray<i64>>(tc);
225 test_array::<StringViewArray>(tc);
226 }
227
228 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
229 let to_date_result =
230 invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
231
232 match to_date_result {
233 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
234 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
235 assert_eq!(
236 date_val, expected,
237 "{}: to_date created wrong value",
238 tc.name
239 );
240 }
241 _ => panic!("Could not convert '{}' to Date", tc.date_str),
242 }
243 }
244
245 fn test_array<A>(tc: &TestCase)
246 where
247 A: From<Vec<&'static str>> + Array + 'static,
248 {
249 let date_array = A::from(vec![tc.date_str]);
250 let batch_len = date_array.len();
251 let to_date_result = invoke_to_date_with_args(
252 vec![ColumnarValue::Array(Arc::new(date_array))],
253 batch_len,
254 );
255
256 match to_date_result {
257 Ok(ColumnarValue::Array(a)) => {
258 assert_eq!(a.len(), 1);
259
260 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
261 let mut builder = Date32Array::builder(4);
262 builder.append_value(expected.unwrap());
263
264 assert_eq!(
265 &builder.finish() as &dyn Array,
266 a.as_ref(),
267 "{}: to_date created wrong value",
268 tc.name
269 );
270 }
271 _ => panic!("Could not convert '{}' to Date", tc.date_str),
272 }
273 }
274 }
275
276 #[test]
277 fn test_to_date_with_format() {
278 struct TestCase {
279 name: &'static str,
280 date_str: &'static str,
281 format_str: &'static str,
282 formatted_date: &'static str,
283 }
284
285 let test_cases = vec![
286 TestCase {
287 name: "Largest four-digit year (9999)",
288 date_str: "9999-12-31",
289 format_str: "%Y%m%d",
290 formatted_date: "99991231",
291 },
292 TestCase {
293 name: "Smallest four-digit year (-9999)",
294 date_str: "-9999-12-31",
295 format_str: "%Y/%m/%d",
296 formatted_date: "-9999/12/31",
297 },
298 TestCase {
299 name: "Year 1 (0001)",
300 date_str: "0001-12-31",
301 format_str: "%Y%m%d",
302 formatted_date: "00011231",
303 },
304 TestCase {
305 name: "Year before epoch (1969)",
306 date_str: "1969-01-01",
307 format_str: "%Y%m%d",
308 formatted_date: "19690101",
309 },
310 TestCase {
311 name: "Switch Julian/Gregorian calendar (1582-10-10)",
312 date_str: "1582-10-10",
313 format_str: "%Y%m%d",
314 formatted_date: "15821010",
315 },
316 TestCase {
317 name: "Negative Year, BC (-42-01-01)",
318 date_str: "-42-01-01",
319 format_str: "%Y/%m/%d",
320 formatted_date: "-42/01/01",
321 },
322 ];
323
324 for tc in &test_cases {
325 test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
326 test_scalar(
327 ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
328 tc,
329 );
330 test_scalar(
331 ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
332 tc,
333 );
334
335 test_array::<GenericStringArray<i32>>(tc);
336 test_array::<GenericStringArray<i64>>(tc);
337 test_array::<StringViewArray>(tc);
338 }
339
340 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
341 let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
342
343 let to_date_result = invoke_to_date_with_args(
344 vec![
345 ColumnarValue::Scalar(sv),
346 ColumnarValue::Scalar(format_scalar),
347 ],
348 1,
349 );
350
351 match to_date_result {
352 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
353 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
354 assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
355 }
356 _ => panic!(
357 "Could not convert '{}' with format string '{}'to Date",
358 tc.date_str, tc.format_str
359 ),
360 }
361 }
362
363 fn test_array<A>(tc: &TestCase)
364 where
365 A: From<Vec<&'static str>> + Array + 'static,
366 {
367 let date_array = A::from(vec![tc.formatted_date]);
368 let format_array = A::from(vec![tc.format_str]);
369 let batch_len = date_array.len();
370
371 let to_date_result = invoke_to_date_with_args(
372 vec![
373 ColumnarValue::Array(Arc::new(date_array)),
374 ColumnarValue::Array(Arc::new(format_array)),
375 ],
376 batch_len,
377 );
378
379 match to_date_result {
380 Ok(ColumnarValue::Array(a)) => {
381 assert_eq!(a.len(), 1);
382
383 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
384 let mut builder = Date32Array::builder(4);
385 builder.append_value(expected.unwrap());
386
387 assert_eq!(
388 &builder.finish() as &dyn Array, a.as_ref(),
389 "{}: to_date created wrong value for date '{}' with format string '{}'",
390 tc.name,
391 tc.formatted_date,
392 tc.format_str
393 );
394 }
395 _ => panic!(
396 "Could not convert '{}' with format string '{}'to Date: {:?}",
397 tc.formatted_date, tc.format_str, to_date_result
398 ),
399 }
400 }
401 }
402
403 #[test]
404 fn test_to_date_multiple_format_strings() {
405 let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
406 let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
407 let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
408
409 let to_date_result = invoke_to_date_with_args(
410 vec![
411 ColumnarValue::Scalar(formatted_date_scalar),
412 ColumnarValue::Scalar(format1_scalar),
413 ColumnarValue::Scalar(format2_scalar),
414 ],
415 1,
416 );
417
418 match to_date_result {
419 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
420 let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
421 assert_eq!(
422 date_val, expected,
423 "to_date created wrong value for date with 2 format strings"
424 );
425 }
426 _ => panic!("Conversion failed",),
427 }
428 }
429
430 #[test]
431 fn test_to_date_from_timestamp() {
432 let test_cases = vec![
433 "2020-09-08T13:42:29Z",
434 "2020-09-08T13:42:29.190855-05:00",
435 "2020-09-08 12:13:29",
436 ];
437 for date_str in test_cases {
438 let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
439
440 let to_date_result = invoke_to_date_with_args(
441 vec![ColumnarValue::Scalar(formatted_date_scalar)],
442 1,
443 );
444
445 match to_date_result {
446 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
447 let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
448 assert_eq!(date_val, expected, "to_date created wrong value");
449 }
450 _ => panic!("Conversion of {date_str} failed"),
451 }
452 }
453 }
454
455 #[test]
456 fn test_to_date_string_with_valid_number() {
457 let date_str = "20241231";
458 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
459
460 let to_date_result =
461 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
462
463 match to_date_result {
464 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
465 let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
466 assert_eq!(
467 date_val, expected,
468 "to_date created wrong value for {date_str}"
469 );
470 }
471 _ => panic!("Conversion of {date_str} failed"),
472 }
473 }
474
475 #[test]
476 fn test_to_date_string_with_invalid_number() {
477 let date_str = "202412311";
478 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
479
480 let to_date_result =
481 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
482
483 if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
484 panic!("Conversion of {date_str} succeeded, but should have failed. ");
485 }
486 }
487}