datafusion_functions/datetime/
to_date.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::compute::cast_with_options;
20use arrow::datatypes::DataType;
21use arrow::datatypes::DataType::*;
22use arrow::error::ArrowError::ParseError;
23use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
24use datafusion_common::format::DEFAULT_CAST_OPTIONS;
25use datafusion_common::{Result, arrow_err, exec_err, internal_datafusion_err};
26use datafusion_expr::{
27    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
28};
29use datafusion_macros::user_doc;
30use std::any::Any;
31
32#[user_doc(
33    doc_section(label = "Time and Date Functions"),
34    description = r"Converts a value to a date (`YYYY-MM-DD`).
35Supports strings, numeric and timestamp types as input.
36Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
37Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
38Returns the corresponding date.
39
40Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
41    syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
42    sql_example = r#"```sql
43> select to_date('2023-01-31');
44+-------------------------------+
45| to_date(Utf8("2023-01-31")) |
46+-------------------------------+
47| 2023-01-31                    |
48+-------------------------------+
49> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
50+---------------------------------------------------------------------+
51| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
52+---------------------------------------------------------------------+
53| 2023-01-31                                                          |
54+---------------------------------------------------------------------+
55```
56
57Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
58"#,
59    standard_argument(name = "expression", prefix = "String"),
60    argument(
61        name = "format_n",
62        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
63  they appear with the first successful one being returned. If none of the formats successfully parse the expression
64  an error will be returned."
65    )
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct ToDateFunc {
69    signature: Signature,
70}
71
72impl Default for ToDateFunc {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78impl ToDateFunc {
79    pub fn new() -> Self {
80        Self {
81            signature: Signature::variadic_any(Volatility::Immutable),
82        }
83    }
84
85    fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
86        match args.len() {
87            1 => handle::<Date32Type, _>(
88                args,
89                |s| match Date32Type::parse(s) {
90                    Some(v) => Ok(v),
91                    None => arrow_err!(ParseError(
92                        "Unable to cast to Date32 for converting from i64 to i32 failed"
93                            .to_string()
94                    )),
95                },
96                "to_date",
97                &Date32,
98            ),
99            2.. => handle_multiple::<Date32Type, _, _>(
100                args,
101                |s, format| {
102                    string_to_timestamp_millis_formatted(s, format)
103                        .map(|n| n / (24 * 60 * 60 * 1_000))
104                        .and_then(|v| {
105                            v.try_into().map_err(|_| {
106                                internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
107                            })
108                        })
109                },
110                |n| n,
111                "to_date",
112                &Date32,
113            ),
114            0 => exec_err!("Unsupported 0 argument count for function to_date"),
115        }
116    }
117}
118
119impl ScalarUDFImpl for ToDateFunc {
120    fn as_any(&self) -> &dyn Any {
121        self
122    }
123
124    fn name(&self) -> &str {
125        "to_date"
126    }
127
128    fn signature(&self) -> &Signature {
129        &self.signature
130    }
131
132    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
133        Ok(Date32)
134    }
135
136    fn invoke_with_args(
137        &self,
138        args: datafusion_expr::ScalarFunctionArgs,
139    ) -> Result<ColumnarValue> {
140        let args = args.args;
141        if args.is_empty() {
142            return exec_err!("to_date function requires 1 or more arguments, got 0");
143        }
144
145        // validate that any args after the first one are Utf8
146        if args.len() > 1 {
147            validate_data_types(&args, "to_date")?;
148        }
149
150        match args[0].data_type() {
151            Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
152                args[0].cast_to(&Date32, None)
153            }
154            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
155                // Arrow cast doesn't support direct casting of these types to date32
156                // as it only supports Int32 and Int64. To work around that limitation,
157                // use cast_with_options to cast to Int32 and then cast the result of
158                // that to Date32.
159                match &args[0] {
160                    ColumnarValue::Array(array) => {
161                        Ok(ColumnarValue::Array(cast_with_options(
162                            &cast_with_options(&array, &Int32, &DEFAULT_CAST_OPTIONS)?,
163                            &Date32,
164                            &DEFAULT_CAST_OPTIONS,
165                        )?))
166                    }
167                    ColumnarValue::Scalar(scalar) => {
168                        let sv =
169                            scalar.cast_to_with_options(&Int32, &DEFAULT_CAST_OPTIONS)?;
170                        Ok(ColumnarValue::Scalar(
171                            sv.cast_to_with_options(&Date32, &DEFAULT_CAST_OPTIONS)?,
172                        ))
173                    }
174                }
175            }
176            Float16
177            | Float32
178            | Float64
179            | Decimal32(_, _)
180            | Decimal64(_, _)
181            | Decimal128(_, _)
182            | Decimal256(_, _) => {
183                // The only way this makes sense is to get the Int64 value of the float
184                // or decimal and then cast that to Date32.
185                args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
186            }
187            Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
188            other => {
189                exec_err!("Unsupported data type {} for function to_date", other)
190            }
191        }
192    }
193
194    fn documentation(&self) -> Option<&Documentation> {
195        self.doc()
196    }
197}
198
199#[cfg(test)]
200mod tests {
201    use super::ToDateFunc;
202    use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
203    use arrow::datatypes::{DataType, Field};
204    use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
205    use datafusion_common::config::ConfigOptions;
206    use datafusion_common::{DataFusionError, ScalarValue};
207    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
208    use std::sync::Arc;
209
210    fn invoke_to_date_with_args(
211        args: Vec<ColumnarValue>,
212        number_rows: usize,
213    ) -> Result<ColumnarValue, DataFusionError> {
214        let arg_fields = args
215            .iter()
216            .map(|arg| Field::new("a", arg.data_type(), true).into())
217            .collect::<Vec<_>>();
218
219        let args = datafusion_expr::ScalarFunctionArgs {
220            args,
221            arg_fields,
222            number_rows,
223            return_field: Field::new("f", DataType::Date32, true).into(),
224            config_options: Arc::new(ConfigOptions::default()),
225        };
226        ToDateFunc::new().invoke_with_args(args)
227    }
228
229    #[test]
230    fn test_to_date_without_format() {
231        struct TestCase {
232            name: &'static str,
233            date_str: &'static str,
234        }
235
236        let test_cases = vec![
237            TestCase {
238                name: "Largest four-digit year (9999)",
239                date_str: "9999-12-31",
240            },
241            TestCase {
242                name: "Year 1 (0001)",
243                date_str: "0001-12-31",
244            },
245            TestCase {
246                name: "Year before epoch (1969)",
247                date_str: "1969-01-01",
248            },
249            TestCase {
250                name: "Switch Julian/Gregorian calendar (1582-10-10)",
251                date_str: "1582-10-10",
252            },
253        ];
254
255        for tc in &test_cases {
256            test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
257            test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
258            test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
259
260            test_array::<GenericStringArray<i32>>(tc);
261            test_array::<GenericStringArray<i64>>(tc);
262            test_array::<StringViewArray>(tc);
263        }
264
265        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
266            let to_date_result =
267                invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
268
269            match to_date_result {
270                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
271                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
272                    assert_eq!(
273                        date_val, expected,
274                        "{}: to_date created wrong value",
275                        tc.name
276                    );
277                }
278                _ => panic!("Could not convert '{}' to Date", tc.date_str),
279            }
280        }
281
282        fn test_array<A>(tc: &TestCase)
283        where
284            A: From<Vec<&'static str>> + Array + 'static,
285        {
286            let date_array = A::from(vec![tc.date_str]);
287            let batch_len = date_array.len();
288            let to_date_result = invoke_to_date_with_args(
289                vec![ColumnarValue::Array(Arc::new(date_array))],
290                batch_len,
291            );
292
293            match to_date_result {
294                Ok(ColumnarValue::Array(a)) => {
295                    assert_eq!(a.len(), 1);
296
297                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
298                    let mut builder = Date32Array::builder(4);
299                    builder.append_value(expected.unwrap());
300
301                    assert_eq!(
302                        &builder.finish() as &dyn Array,
303                        a.as_ref(),
304                        "{}: to_date created wrong value",
305                        tc.name
306                    );
307                }
308                _ => panic!("Could not convert '{}' to Date", tc.date_str),
309            }
310        }
311    }
312
313    #[test]
314    fn test_to_date_with_format() {
315        struct TestCase {
316            name: &'static str,
317            date_str: &'static str,
318            format_str: &'static str,
319            formatted_date: &'static str,
320        }
321
322        let test_cases = vec![
323            TestCase {
324                name: "Largest four-digit year (9999)",
325                date_str: "9999-12-31",
326                format_str: "%Y%m%d",
327                formatted_date: "99991231",
328            },
329            TestCase {
330                name: "Smallest four-digit year (-9999)",
331                date_str: "-9999-12-31",
332                format_str: "%Y/%m/%d",
333                formatted_date: "-9999/12/31",
334            },
335            TestCase {
336                name: "Year 1 (0001)",
337                date_str: "0001-12-31",
338                format_str: "%Y%m%d",
339                formatted_date: "00011231",
340            },
341            TestCase {
342                name: "Year before epoch (1969)",
343                date_str: "1969-01-01",
344                format_str: "%Y%m%d",
345                formatted_date: "19690101",
346            },
347            TestCase {
348                name: "Switch Julian/Gregorian calendar (1582-10-10)",
349                date_str: "1582-10-10",
350                format_str: "%Y%m%d",
351                formatted_date: "15821010",
352            },
353            TestCase {
354                name: "Negative Year, BC (-42-01-01)",
355                date_str: "-42-01-01",
356                format_str: "%Y/%m/%d",
357                formatted_date: "-42/01/01",
358            },
359        ];
360
361        for tc in &test_cases {
362            test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
363            test_scalar(
364                ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
365                tc,
366            );
367            test_scalar(
368                ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
369                tc,
370            );
371
372            test_array::<GenericStringArray<i32>>(tc);
373            test_array::<GenericStringArray<i64>>(tc);
374            test_array::<StringViewArray>(tc);
375        }
376
377        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
378            let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
379
380            let to_date_result = invoke_to_date_with_args(
381                vec![
382                    ColumnarValue::Scalar(sv),
383                    ColumnarValue::Scalar(format_scalar),
384                ],
385                1,
386            );
387
388            match to_date_result {
389                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
390                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
391                    assert_eq!(
392                        date_val, expected,
393                        "{}: to_date created wrong value for date '{}' with format string '{}'",
394                        tc.name, tc.formatted_date, tc.format_str
395                    );
396                }
397                _ => panic!(
398                    "Could not convert '{}' with format string '{}'to Date",
399                    tc.date_str, tc.format_str
400                ),
401            }
402        }
403
404        fn test_array<A>(tc: &TestCase)
405        where
406            A: From<Vec<&'static str>> + Array + 'static,
407        {
408            let date_array = A::from(vec![tc.formatted_date]);
409            let format_array = A::from(vec![tc.format_str]);
410            let batch_len = date_array.len();
411
412            let to_date_result = invoke_to_date_with_args(
413                vec![
414                    ColumnarValue::Array(Arc::new(date_array)),
415                    ColumnarValue::Array(Arc::new(format_array)),
416                ],
417                batch_len,
418            );
419
420            match to_date_result {
421                Ok(ColumnarValue::Array(a)) => {
422                    assert_eq!(a.len(), 1);
423
424                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
425                    let mut builder = Date32Array::builder(4);
426                    builder.append_value(expected.unwrap());
427
428                    assert_eq!(
429                        &builder.finish() as &dyn Array,
430                        a.as_ref(),
431                        "{}: to_date created wrong value for date '{}' with format string '{}'",
432                        tc.name,
433                        tc.formatted_date,
434                        tc.format_str
435                    );
436                }
437                _ => panic!(
438                    "Could not convert '{}' with format string '{}'to Date: {:?}",
439                    tc.formatted_date, tc.format_str, to_date_result
440                ),
441            }
442        }
443    }
444
445    #[test]
446    fn test_to_date_multiple_format_strings() {
447        let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
448        let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
449        let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
450
451        let to_date_result = invoke_to_date_with_args(
452            vec![
453                ColumnarValue::Scalar(formatted_date_scalar),
454                ColumnarValue::Scalar(format1_scalar),
455                ColumnarValue::Scalar(format2_scalar),
456            ],
457            1,
458        );
459
460        match to_date_result {
461            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
462                let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
463                assert_eq!(
464                    date_val, expected,
465                    "to_date created wrong value for date with 2 format strings"
466                );
467            }
468            _ => panic!("Conversion failed",),
469        }
470    }
471
472    #[test]
473    fn test_to_date_from_timestamp() {
474        let test_cases = vec![
475            "2020-09-08T13:42:29Z",
476            "2020-09-08T13:42:29.190855-05:00",
477            "2020-09-08 12:13:29",
478        ];
479        for date_str in test_cases {
480            let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
481
482            let to_date_result = invoke_to_date_with_args(
483                vec![ColumnarValue::Scalar(formatted_date_scalar)],
484                1,
485            );
486
487            match to_date_result {
488                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
489                    let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
490                    assert_eq!(date_val, expected, "to_date created wrong value");
491                }
492                _ => panic!("Conversion of {date_str} failed"),
493            }
494        }
495    }
496
497    #[test]
498    fn test_to_date_string_with_valid_number() {
499        let date_str = "20241231";
500        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
501
502        let to_date_result =
503            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
504
505        match to_date_result {
506            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
507                let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
508                assert_eq!(
509                    date_val, expected,
510                    "to_date created wrong value for {date_str}"
511                );
512            }
513            _ => panic!("Conversion of {date_str} failed"),
514        }
515    }
516
517    #[test]
518    fn test_to_date_string_with_invalid_number() {
519        let date_str = "202412311";
520        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
521
522        let to_date_result =
523            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
524
525        if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
526            panic!("Conversion of {date_str} succeeded, but should have failed. ");
527        }
528    }
529}