1use crate::datetime::common::*;
19use arrow::compute::cast_with_options;
20use arrow::datatypes::DataType;
21use arrow::datatypes::DataType::*;
22use arrow::error::ArrowError::ParseError;
23use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
24use datafusion_common::format::DEFAULT_CAST_OPTIONS;
25use datafusion_common::{Result, arrow_err, exec_err, internal_datafusion_err};
26use datafusion_expr::{
27 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
28};
29use datafusion_macros::user_doc;
30use std::any::Any;
31
32#[user_doc(
33 doc_section(label = "Time and Date Functions"),
34 description = r"Converts a value to a date (`YYYY-MM-DD`).
35Supports strings, numeric and timestamp types as input.
36Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
37Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
38Returns the corresponding date.
39
40Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
41 syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
42 sql_example = r#"```sql
43> select to_date('2023-01-31');
44+-------------------------------+
45| to_date(Utf8("2023-01-31")) |
46+-------------------------------+
47| 2023-01-31 |
48+-------------------------------+
49> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
50+---------------------------------------------------------------------+
51| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
52+---------------------------------------------------------------------+
53| 2023-01-31 |
54+---------------------------------------------------------------------+
55```
56
57Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
58"#,
59 standard_argument(name = "expression", prefix = "String"),
60 argument(
61 name = "format_n",
62 description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
63 they appear with the first successful one being returned. If none of the formats successfully parse the expression
64 an error will be returned."
65 )
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct ToDateFunc {
69 signature: Signature,
70}
71
72impl Default for ToDateFunc {
73 fn default() -> Self {
74 Self::new()
75 }
76}
77
78impl ToDateFunc {
79 pub fn new() -> Self {
80 Self {
81 signature: Signature::variadic_any(Volatility::Immutable),
82 }
83 }
84
85 fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
86 match args.len() {
87 1 => handle::<Date32Type, _>(
88 args,
89 |s| match Date32Type::parse(s) {
90 Some(v) => Ok(v),
91 None => arrow_err!(ParseError(
92 "Unable to cast to Date32 for converting from i64 to i32 failed"
93 .to_string()
94 )),
95 },
96 "to_date",
97 &Date32,
98 ),
99 2.. => handle_multiple::<Date32Type, _, _>(
100 args,
101 |s, format| {
102 string_to_timestamp_millis_formatted(s, format)
103 .map(|n| n / (24 * 60 * 60 * 1_000))
104 .and_then(|v| {
105 v.try_into().map_err(|_| {
106 internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
107 })
108 })
109 },
110 |n| n,
111 "to_date",
112 &Date32,
113 ),
114 0 => exec_err!("Unsupported 0 argument count for function to_date"),
115 }
116 }
117}
118
119impl ScalarUDFImpl for ToDateFunc {
120 fn as_any(&self) -> &dyn Any {
121 self
122 }
123
124 fn name(&self) -> &str {
125 "to_date"
126 }
127
128 fn signature(&self) -> &Signature {
129 &self.signature
130 }
131
132 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
133 Ok(Date32)
134 }
135
136 fn invoke_with_args(
137 &self,
138 args: datafusion_expr::ScalarFunctionArgs,
139 ) -> Result<ColumnarValue> {
140 let args = args.args;
141 if args.is_empty() {
142 return exec_err!("to_date function requires 1 or more arguments, got 0");
143 }
144
145 if args.len() > 1 {
147 validate_data_types(&args, "to_date")?;
148 }
149
150 match args[0].data_type() {
151 Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
152 args[0].cast_to(&Date32, None)
153 }
154 UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
155 match &args[0] {
160 ColumnarValue::Array(array) => {
161 Ok(ColumnarValue::Array(cast_with_options(
162 &cast_with_options(&array, &Int32, &DEFAULT_CAST_OPTIONS)?,
163 &Date32,
164 &DEFAULT_CAST_OPTIONS,
165 )?))
166 }
167 ColumnarValue::Scalar(scalar) => {
168 let sv =
169 scalar.cast_to_with_options(&Int32, &DEFAULT_CAST_OPTIONS)?;
170 Ok(ColumnarValue::Scalar(
171 sv.cast_to_with_options(&Date32, &DEFAULT_CAST_OPTIONS)?,
172 ))
173 }
174 }
175 }
176 Float16
177 | Float32
178 | Float64
179 | Decimal32(_, _)
180 | Decimal64(_, _)
181 | Decimal128(_, _)
182 | Decimal256(_, _) => {
183 args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
186 }
187 Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
188 other => {
189 exec_err!("Unsupported data type {} for function to_date", other)
190 }
191 }
192 }
193
194 fn documentation(&self) -> Option<&Documentation> {
195 self.doc()
196 }
197}
198
199#[cfg(test)]
200mod tests {
201 use super::ToDateFunc;
202 use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
203 use arrow::datatypes::{DataType, Field};
204 use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
205 use datafusion_common::config::ConfigOptions;
206 use datafusion_common::{DataFusionError, ScalarValue};
207 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
208 use std::sync::Arc;
209
210 fn invoke_to_date_with_args(
211 args: Vec<ColumnarValue>,
212 number_rows: usize,
213 ) -> Result<ColumnarValue, DataFusionError> {
214 let arg_fields = args
215 .iter()
216 .map(|arg| Field::new("a", arg.data_type(), true).into())
217 .collect::<Vec<_>>();
218
219 let args = datafusion_expr::ScalarFunctionArgs {
220 args,
221 arg_fields,
222 number_rows,
223 return_field: Field::new("f", DataType::Date32, true).into(),
224 config_options: Arc::new(ConfigOptions::default()),
225 };
226 ToDateFunc::new().invoke_with_args(args)
227 }
228
229 #[test]
230 fn test_to_date_without_format() {
231 struct TestCase {
232 name: &'static str,
233 date_str: &'static str,
234 }
235
236 let test_cases = vec![
237 TestCase {
238 name: "Largest four-digit year (9999)",
239 date_str: "9999-12-31",
240 },
241 TestCase {
242 name: "Year 1 (0001)",
243 date_str: "0001-12-31",
244 },
245 TestCase {
246 name: "Year before epoch (1969)",
247 date_str: "1969-01-01",
248 },
249 TestCase {
250 name: "Switch Julian/Gregorian calendar (1582-10-10)",
251 date_str: "1582-10-10",
252 },
253 ];
254
255 for tc in &test_cases {
256 test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
257 test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
258 test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
259
260 test_array::<GenericStringArray<i32>>(tc);
261 test_array::<GenericStringArray<i64>>(tc);
262 test_array::<StringViewArray>(tc);
263 }
264
265 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
266 let to_date_result =
267 invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
268
269 match to_date_result {
270 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
271 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
272 assert_eq!(
273 date_val, expected,
274 "{}: to_date created wrong value",
275 tc.name
276 );
277 }
278 _ => panic!("Could not convert '{}' to Date", tc.date_str),
279 }
280 }
281
282 fn test_array<A>(tc: &TestCase)
283 where
284 A: From<Vec<&'static str>> + Array + 'static,
285 {
286 let date_array = A::from(vec![tc.date_str]);
287 let batch_len = date_array.len();
288 let to_date_result = invoke_to_date_with_args(
289 vec![ColumnarValue::Array(Arc::new(date_array))],
290 batch_len,
291 );
292
293 match to_date_result {
294 Ok(ColumnarValue::Array(a)) => {
295 assert_eq!(a.len(), 1);
296
297 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
298 let mut builder = Date32Array::builder(4);
299 builder.append_value(expected.unwrap());
300
301 assert_eq!(
302 &builder.finish() as &dyn Array,
303 a.as_ref(),
304 "{}: to_date created wrong value",
305 tc.name
306 );
307 }
308 _ => panic!("Could not convert '{}' to Date", tc.date_str),
309 }
310 }
311 }
312
313 #[test]
314 fn test_to_date_with_format() {
315 struct TestCase {
316 name: &'static str,
317 date_str: &'static str,
318 format_str: &'static str,
319 formatted_date: &'static str,
320 }
321
322 let test_cases = vec![
323 TestCase {
324 name: "Largest four-digit year (9999)",
325 date_str: "9999-12-31",
326 format_str: "%Y%m%d",
327 formatted_date: "99991231",
328 },
329 TestCase {
330 name: "Smallest four-digit year (-9999)",
331 date_str: "-9999-12-31",
332 format_str: "%Y/%m/%d",
333 formatted_date: "-9999/12/31",
334 },
335 TestCase {
336 name: "Year 1 (0001)",
337 date_str: "0001-12-31",
338 format_str: "%Y%m%d",
339 formatted_date: "00011231",
340 },
341 TestCase {
342 name: "Year before epoch (1969)",
343 date_str: "1969-01-01",
344 format_str: "%Y%m%d",
345 formatted_date: "19690101",
346 },
347 TestCase {
348 name: "Switch Julian/Gregorian calendar (1582-10-10)",
349 date_str: "1582-10-10",
350 format_str: "%Y%m%d",
351 formatted_date: "15821010",
352 },
353 TestCase {
354 name: "Negative Year, BC (-42-01-01)",
355 date_str: "-42-01-01",
356 format_str: "%Y/%m/%d",
357 formatted_date: "-42/01/01",
358 },
359 ];
360
361 for tc in &test_cases {
362 test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
363 test_scalar(
364 ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
365 tc,
366 );
367 test_scalar(
368 ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
369 tc,
370 );
371
372 test_array::<GenericStringArray<i32>>(tc);
373 test_array::<GenericStringArray<i64>>(tc);
374 test_array::<StringViewArray>(tc);
375 }
376
377 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
378 let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
379
380 let to_date_result = invoke_to_date_with_args(
381 vec![
382 ColumnarValue::Scalar(sv),
383 ColumnarValue::Scalar(format_scalar),
384 ],
385 1,
386 );
387
388 match to_date_result {
389 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
390 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
391 assert_eq!(
392 date_val, expected,
393 "{}: to_date created wrong value for date '{}' with format string '{}'",
394 tc.name, tc.formatted_date, tc.format_str
395 );
396 }
397 _ => panic!(
398 "Could not convert '{}' with format string '{}'to Date",
399 tc.date_str, tc.format_str
400 ),
401 }
402 }
403
404 fn test_array<A>(tc: &TestCase)
405 where
406 A: From<Vec<&'static str>> + Array + 'static,
407 {
408 let date_array = A::from(vec![tc.formatted_date]);
409 let format_array = A::from(vec![tc.format_str]);
410 let batch_len = date_array.len();
411
412 let to_date_result = invoke_to_date_with_args(
413 vec![
414 ColumnarValue::Array(Arc::new(date_array)),
415 ColumnarValue::Array(Arc::new(format_array)),
416 ],
417 batch_len,
418 );
419
420 match to_date_result {
421 Ok(ColumnarValue::Array(a)) => {
422 assert_eq!(a.len(), 1);
423
424 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
425 let mut builder = Date32Array::builder(4);
426 builder.append_value(expected.unwrap());
427
428 assert_eq!(
429 &builder.finish() as &dyn Array,
430 a.as_ref(),
431 "{}: to_date created wrong value for date '{}' with format string '{}'",
432 tc.name,
433 tc.formatted_date,
434 tc.format_str
435 );
436 }
437 _ => panic!(
438 "Could not convert '{}' with format string '{}'to Date: {:?}",
439 tc.formatted_date, tc.format_str, to_date_result
440 ),
441 }
442 }
443 }
444
445 #[test]
446 fn test_to_date_multiple_format_strings() {
447 let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
448 let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
449 let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
450
451 let to_date_result = invoke_to_date_with_args(
452 vec![
453 ColumnarValue::Scalar(formatted_date_scalar),
454 ColumnarValue::Scalar(format1_scalar),
455 ColumnarValue::Scalar(format2_scalar),
456 ],
457 1,
458 );
459
460 match to_date_result {
461 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
462 let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
463 assert_eq!(
464 date_val, expected,
465 "to_date created wrong value for date with 2 format strings"
466 );
467 }
468 _ => panic!("Conversion failed",),
469 }
470 }
471
472 #[test]
473 fn test_to_date_from_timestamp() {
474 let test_cases = vec![
475 "2020-09-08T13:42:29Z",
476 "2020-09-08T13:42:29.190855-05:00",
477 "2020-09-08 12:13:29",
478 ];
479 for date_str in test_cases {
480 let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
481
482 let to_date_result = invoke_to_date_with_args(
483 vec![ColumnarValue::Scalar(formatted_date_scalar)],
484 1,
485 );
486
487 match to_date_result {
488 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
489 let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
490 assert_eq!(date_val, expected, "to_date created wrong value");
491 }
492 _ => panic!("Conversion of {date_str} failed"),
493 }
494 }
495 }
496
497 #[test]
498 fn test_to_date_string_with_valid_number() {
499 let date_str = "20241231";
500 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
501
502 let to_date_result =
503 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
504
505 match to_date_result {
506 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
507 let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
508 assert_eq!(
509 date_val, expected,
510 "to_date created wrong value for {date_str}"
511 );
512 }
513 _ => panic!("Conversion of {date_str} failed"),
514 }
515 }
516
517 #[test]
518 fn test_to_date_string_with_invalid_number() {
519 let date_str = "202412311";
520 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
521
522 let to_date_result =
523 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
524
525 if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
526 panic!("Conversion of {date_str} succeeded, but should have failed. ");
527 }
528 }
529}