1use crate::datetime::common::*;
19use arrow::compute::cast_with_options;
20use arrow::datatypes::DataType;
21use arrow::datatypes::DataType::*;
22use arrow::error::ArrowError::ParseError;
23use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
24use datafusion_common::format::DEFAULT_CAST_OPTIONS;
25use datafusion_common::{Result, arrow_err, exec_err, internal_datafusion_err};
26use datafusion_expr::{
27 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28 Volatility,
29};
30use datafusion_macros::user_doc;
31
32#[user_doc(
33 doc_section(label = "Time and Date Functions"),
34 description = r"Converts a value to a date (`YYYY-MM-DD`).
35Supports strings, numeric and timestamp types as input.
36Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
37Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
38Returns the corresponding date.
39
40Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
41 syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
42 sql_example = r#"```sql
43> select to_date('2023-01-31');
44+-------------------------------+
45| to_date(Utf8("2023-01-31")) |
46+-------------------------------+
47| 2023-01-31 |
48+-------------------------------+
49> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
50+---------------------------------------------------------------------+
51| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
52+---------------------------------------------------------------------+
53| 2023-01-31 |
54+---------------------------------------------------------------------+
55```
56
57Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
58"#,
59 standard_argument(name = "expression", prefix = "String"),
60 argument(
61 name = "format_n",
62 description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
63 they appear with the first successful one being returned. If none of the formats successfully parse the expression
64 an error will be returned."
65 )
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct ToDateFunc {
69 signature: Signature,
70}
71
72impl Default for ToDateFunc {
73 fn default() -> Self {
74 Self::new()
75 }
76}
77
78impl ToDateFunc {
79 pub fn new() -> Self {
80 Self {
81 signature: Signature::variadic_any(Volatility::Immutable),
82 }
83 }
84
85 fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
86 match args.len() {
87 1 => handle::<Date32Type, _>(
88 args,
89 |s| match Date32Type::parse(s) {
90 Some(v) => Ok(v),
91 None => arrow_err!(ParseError(
92 "Unable to cast to Date32 for converting from i64 to i32 failed"
93 .to_string()
94 )),
95 },
96 "to_date",
97 &Date32,
98 ),
99 2.. => handle_multiple::<Date32Type, _, _>(
100 args,
101 |s, format| {
102 string_to_timestamp_millis_formatted(s, format)
103 .map(|n| n / (24 * 60 * 60 * 1_000))
104 .and_then(|v| {
105 v.try_into().map_err(|_| {
106 internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
107 })
108 })
109 },
110 |n| n,
111 "to_date",
112 &Date32,
113 ),
114 0 => exec_err!("Unsupported 0 argument count for function to_date"),
115 }
116 }
117}
118
119impl ScalarUDFImpl for ToDateFunc {
120 fn name(&self) -> &str {
121 "to_date"
122 }
123
124 fn signature(&self) -> &Signature {
125 &self.signature
126 }
127
128 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
129 Ok(Date32)
130 }
131
132 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
133 let args = args.args;
134 if args.is_empty() {
135 return exec_err!("to_date function requires 1 or more arguments, got 0");
136 }
137
138 if args.len() > 1 {
140 validate_data_types(&args, "to_date")?;
141 }
142
143 match args[0].data_type() {
144 Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
145 args[0].cast_to(&Date32, None)
146 }
147 UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
148 match &args[0] {
153 ColumnarValue::Array(array) => {
154 Ok(ColumnarValue::Array(cast_with_options(
155 &cast_with_options(&array, &Int32, &DEFAULT_CAST_OPTIONS)?,
156 &Date32,
157 &DEFAULT_CAST_OPTIONS,
158 )?))
159 }
160 ColumnarValue::Scalar(scalar) => {
161 let sv =
162 scalar.cast_to_with_options(&Int32, &DEFAULT_CAST_OPTIONS)?;
163 Ok(ColumnarValue::Scalar(
164 sv.cast_to_with_options(&Date32, &DEFAULT_CAST_OPTIONS)?,
165 ))
166 }
167 }
168 }
169 Float16
170 | Float32
171 | Float64
172 | Decimal32(_, _)
173 | Decimal64(_, _)
174 | Decimal128(_, _)
175 | Decimal256(_, _) => {
176 args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
179 }
180 Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
181 other => {
182 exec_err!("Unsupported data type {} for function to_date", other)
183 }
184 }
185 }
186
187 fn documentation(&self) -> Option<&Documentation> {
188 self.doc()
189 }
190}
191
192#[cfg(test)]
193mod tests {
194 use super::ToDateFunc;
195 use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
196 use arrow::datatypes::{DataType, Field};
197 use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
198 use datafusion_common::config::ConfigOptions;
199 use datafusion_common::{DataFusionError, ScalarValue};
200 use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
201 use std::sync::Arc;
202
203 fn invoke_to_date_with_args(
204 args: Vec<ColumnarValue>,
205 number_rows: usize,
206 ) -> Result<ColumnarValue, DataFusionError> {
207 let arg_fields = args
208 .iter()
209 .map(|arg| Field::new("a", arg.data_type(), true).into())
210 .collect::<Vec<_>>();
211
212 let args = ScalarFunctionArgs {
213 args,
214 arg_fields,
215 number_rows,
216 return_field: Field::new("f", DataType::Date32, true).into(),
217 config_options: Arc::new(ConfigOptions::default()),
218 };
219 ToDateFunc::new().invoke_with_args(args)
220 }
221
222 #[test]
223 fn test_to_date_without_format() {
224 struct TestCase {
225 name: &'static str,
226 date_str: &'static str,
227 }
228
229 let test_cases = vec![
230 TestCase {
231 name: "Largest four-digit year (9999)",
232 date_str: "9999-12-31",
233 },
234 TestCase {
235 name: "Year 1 (0001)",
236 date_str: "0001-12-31",
237 },
238 TestCase {
239 name: "Year before epoch (1969)",
240 date_str: "1969-01-01",
241 },
242 TestCase {
243 name: "Switch Julian/Gregorian calendar (1582-10-10)",
244 date_str: "1582-10-10",
245 },
246 ];
247
248 for tc in &test_cases {
249 test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
250 test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
251 test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
252
253 test_array::<GenericStringArray<i32>>(tc);
254 test_array::<GenericStringArray<i64>>(tc);
255 test_array::<StringViewArray>(tc);
256 }
257
258 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
259 let to_date_result =
260 invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
261
262 match to_date_result {
263 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
264 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
265 assert_eq!(
266 date_val, expected,
267 "{}: to_date created wrong value",
268 tc.name
269 );
270 }
271 _ => panic!("Could not convert '{}' to Date", tc.date_str),
272 }
273 }
274
275 fn test_array<A>(tc: &TestCase)
276 where
277 A: From<Vec<&'static str>> + Array + 'static,
278 {
279 let date_array = A::from(vec![tc.date_str]);
280 let batch_len = date_array.len();
281 let to_date_result = invoke_to_date_with_args(
282 vec![ColumnarValue::Array(Arc::new(date_array))],
283 batch_len,
284 );
285
286 match to_date_result {
287 Ok(ColumnarValue::Array(a)) => {
288 assert_eq!(a.len(), 1);
289
290 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
291 let mut builder = Date32Array::builder(4);
292 builder.append_value(expected.unwrap());
293
294 assert_eq!(
295 &builder.finish() as &dyn Array,
296 a.as_ref(),
297 "{}: to_date created wrong value",
298 tc.name
299 );
300 }
301 _ => panic!("Could not convert '{}' to Date", tc.date_str),
302 }
303 }
304 }
305
306 #[test]
307 fn test_to_date_with_format() {
308 struct TestCase {
309 name: &'static str,
310 date_str: &'static str,
311 format_str: &'static str,
312 formatted_date: &'static str,
313 }
314
315 let test_cases = vec![
316 TestCase {
317 name: "Largest four-digit year (9999)",
318 date_str: "9999-12-31",
319 format_str: "%Y%m%d",
320 formatted_date: "99991231",
321 },
322 TestCase {
323 name: "Smallest four-digit year (-9999)",
324 date_str: "-9999-12-31",
325 format_str: "%Y/%m/%d",
326 formatted_date: "-9999/12/31",
327 },
328 TestCase {
329 name: "Year 1 (0001)",
330 date_str: "0001-12-31",
331 format_str: "%Y%m%d",
332 formatted_date: "00011231",
333 },
334 TestCase {
335 name: "Year before epoch (1969)",
336 date_str: "1969-01-01",
337 format_str: "%Y%m%d",
338 formatted_date: "19690101",
339 },
340 TestCase {
341 name: "Switch Julian/Gregorian calendar (1582-10-10)",
342 date_str: "1582-10-10",
343 format_str: "%Y%m%d",
344 formatted_date: "15821010",
345 },
346 TestCase {
347 name: "Negative Year, BC (-42-01-01)",
348 date_str: "-42-01-01",
349 format_str: "%Y/%m/%d",
350 formatted_date: "-42/01/01",
351 },
352 ];
353
354 for tc in &test_cases {
355 test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
356 test_scalar(
357 ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
358 tc,
359 );
360 test_scalar(
361 ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
362 tc,
363 );
364
365 test_array::<GenericStringArray<i32>>(tc);
366 test_array::<GenericStringArray<i64>>(tc);
367 test_array::<StringViewArray>(tc);
368 }
369
370 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
371 let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
372
373 let to_date_result = invoke_to_date_with_args(
374 vec![
375 ColumnarValue::Scalar(sv),
376 ColumnarValue::Scalar(format_scalar),
377 ],
378 1,
379 );
380
381 match to_date_result {
382 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
383 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
384 assert_eq!(
385 date_val, expected,
386 "{}: to_date created wrong value for date '{}' with format string '{}'",
387 tc.name, tc.formatted_date, tc.format_str
388 );
389 }
390 _ => panic!(
391 "Could not convert '{}' with format string '{}'to Date",
392 tc.date_str, tc.format_str
393 ),
394 }
395 }
396
397 fn test_array<A>(tc: &TestCase)
398 where
399 A: From<Vec<&'static str>> + Array + 'static,
400 {
401 let date_array = A::from(vec![tc.formatted_date]);
402 let format_array = A::from(vec![tc.format_str]);
403 let batch_len = date_array.len();
404
405 let to_date_result = invoke_to_date_with_args(
406 vec![
407 ColumnarValue::Array(Arc::new(date_array)),
408 ColumnarValue::Array(Arc::new(format_array)),
409 ],
410 batch_len,
411 );
412
413 match to_date_result {
414 Ok(ColumnarValue::Array(a)) => {
415 assert_eq!(a.len(), 1);
416
417 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
418 let mut builder = Date32Array::builder(4);
419 builder.append_value(expected.unwrap());
420
421 assert_eq!(
422 &builder.finish() as &dyn Array,
423 a.as_ref(),
424 "{}: to_date created wrong value for date '{}' with format string '{}'",
425 tc.name,
426 tc.formatted_date,
427 tc.format_str
428 );
429 }
430 _ => panic!(
431 "Could not convert '{}' with format string '{}'to Date: {:?}",
432 tc.formatted_date, tc.format_str, to_date_result
433 ),
434 }
435 }
436 }
437
438 #[test]
439 fn test_to_date_multiple_format_strings() {
440 let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
441 let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
442 let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
443
444 let to_date_result = invoke_to_date_with_args(
445 vec![
446 ColumnarValue::Scalar(formatted_date_scalar),
447 ColumnarValue::Scalar(format1_scalar),
448 ColumnarValue::Scalar(format2_scalar),
449 ],
450 1,
451 );
452
453 match to_date_result {
454 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
455 let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
456 assert_eq!(
457 date_val, expected,
458 "to_date created wrong value for date with 2 format strings"
459 );
460 }
461 _ => panic!("Conversion failed",),
462 }
463 }
464
465 #[test]
466 fn test_to_date_from_timestamp() {
467 let test_cases = vec![
468 "2020-09-08T13:42:29Z",
469 "2020-09-08T13:42:29.190855-05:00",
470 "2020-09-08 12:13:29",
471 ];
472 for date_str in test_cases {
473 let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
474
475 let to_date_result = invoke_to_date_with_args(
476 vec![ColumnarValue::Scalar(formatted_date_scalar)],
477 1,
478 );
479
480 match to_date_result {
481 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
482 let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
483 assert_eq!(date_val, expected, "to_date created wrong value");
484 }
485 _ => panic!("Conversion of {date_str} failed"),
486 }
487 }
488 }
489
490 #[test]
491 fn test_to_date_string_with_valid_number() {
492 let date_str = "20241231";
493 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
494
495 let to_date_result =
496 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
497
498 match to_date_result {
499 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
500 let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
501 assert_eq!(
502 date_val, expected,
503 "to_date created wrong value for {date_str}"
504 );
505 }
506 _ => panic!("Conversion of {date_str} failed"),
507 }
508 }
509
510 #[test]
511 fn test_to_date_string_with_invalid_number() {
512 let date_str = "202412311";
513 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
514
515 let to_date_result =
516 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
517
518 if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
519 panic!("Conversion of {date_str} succeeded, but should have failed. ");
520 }
521 }
522}