datafusion_functions/datetime/
to_date.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32    doc_section(label = "Time and Date Functions"),
33    description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40    syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41    sql_example = r#"```sql
42> select to_date('2023-01-31'); 
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31                    |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31                                                          |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
57"#,
58    standard_argument(name = "expression", prefix = "String"),
59    argument(
60        name = "format_n",
61        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62  they appear with the first successful one being returned. If none of the formats successfully parse the expression
63  an error will be returned."
64    )
65)]
66#[derive(Debug, PartialEq, Eq, Hash)]
67pub struct ToDateFunc {
68    signature: Signature,
69}
70
71impl Default for ToDateFunc {
72    fn default() -> Self {
73        Self::new()
74    }
75}
76
77impl ToDateFunc {
78    pub fn new() -> Self {
79        Self {
80            signature: Signature::variadic_any(Volatility::Immutable),
81        }
82    }
83
84    fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85        match args.len() {
86            1 => handle::<Date32Type, _, Date32Type>(
87                args,
88                |s| match Date32Type::parse(s) {
89                    Some(v) => Ok(v),
90                    None => arrow_err!(ParseError(
91                        "Unable to cast to Date32 for converting from i64 to i32 failed"
92                            .to_string()
93                    )),
94                },
95                "to_date",
96            ),
97            2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98                args,
99                |s, format| {
100                    string_to_timestamp_millis_formatted(s, format)
101                        .map(|n| n / (24 * 60 * 60 * 1_000))
102                        .and_then(|v| {
103                            v.try_into().map_err(|_| {
104                                internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105                            })
106                        })
107                },
108                |n| n,
109                "to_date",
110            ),
111            0 => exec_err!("Unsupported 0 argument count for function to_date"),
112        }
113    }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117    fn as_any(&self) -> &dyn Any {
118        self
119    }
120
121    fn name(&self) -> &str {
122        "to_date"
123    }
124
125    fn signature(&self) -> &Signature {
126        &self.signature
127    }
128
129    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130        Ok(Date32)
131    }
132
133    fn invoke_with_args(
134        &self,
135        args: datafusion_expr::ScalarFunctionArgs,
136    ) -> Result<ColumnarValue> {
137        let args = args.args;
138        if args.is_empty() {
139            return exec_err!("to_date function requires 1 or more arguments, got 0");
140        }
141
142        // validate that any args after the first one are Utf8
143        if args.len() > 1 {
144            validate_data_types(&args, "to_date")?;
145        }
146
147        match args[0].data_type() {
148            Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149                args[0].cast_to(&Date32, None)
150            }
151            Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152            other => {
153                exec_err!("Unsupported data type {:?} for function to_date", other)
154            }
155        }
156    }
157
158    fn documentation(&self) -> Option<&Documentation> {
159        self.doc()
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use super::ToDateFunc;
166    use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
167    use arrow::datatypes::{DataType, Field};
168    use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
169    use datafusion_common::config::ConfigOptions;
170    use datafusion_common::{DataFusionError, ScalarValue};
171    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
172    use std::sync::Arc;
173
174    fn invoke_to_date_with_args(
175        args: Vec<ColumnarValue>,
176        number_rows: usize,
177    ) -> Result<ColumnarValue, DataFusionError> {
178        let arg_fields = args
179            .iter()
180            .map(|arg| Field::new("a", arg.data_type(), true).into())
181            .collect::<Vec<_>>();
182
183        let args = datafusion_expr::ScalarFunctionArgs {
184            args,
185            arg_fields,
186            number_rows,
187            return_field: Field::new("f", DataType::Date32, true).into(),
188            config_options: Arc::new(ConfigOptions::default()),
189        };
190        ToDateFunc::new().invoke_with_args(args)
191    }
192
193    #[test]
194    fn test_to_date_without_format() {
195        struct TestCase {
196            name: &'static str,
197            date_str: &'static str,
198        }
199
200        let test_cases = vec![
201            TestCase {
202                name: "Largest four-digit year (9999)",
203                date_str: "9999-12-31",
204            },
205            TestCase {
206                name: "Year 1 (0001)",
207                date_str: "0001-12-31",
208            },
209            TestCase {
210                name: "Year before epoch (1969)",
211                date_str: "1969-01-01",
212            },
213            TestCase {
214                name: "Switch Julian/Gregorian calendar (1582-10-10)",
215                date_str: "1582-10-10",
216            },
217        ];
218
219        for tc in &test_cases {
220            test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
221            test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
222            test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
223
224            test_array::<GenericStringArray<i32>>(tc);
225            test_array::<GenericStringArray<i64>>(tc);
226            test_array::<StringViewArray>(tc);
227        }
228
229        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
230            let to_date_result =
231                invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
232
233            match to_date_result {
234                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
235                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
236                    assert_eq!(
237                        date_val, expected,
238                        "{}: to_date created wrong value",
239                        tc.name
240                    );
241                }
242                _ => panic!("Could not convert '{}' to Date", tc.date_str),
243            }
244        }
245
246        fn test_array<A>(tc: &TestCase)
247        where
248            A: From<Vec<&'static str>> + Array + 'static,
249        {
250            let date_array = A::from(vec![tc.date_str]);
251            let batch_len = date_array.len();
252            let to_date_result = invoke_to_date_with_args(
253                vec![ColumnarValue::Array(Arc::new(date_array))],
254                batch_len,
255            );
256
257            match to_date_result {
258                Ok(ColumnarValue::Array(a)) => {
259                    assert_eq!(a.len(), 1);
260
261                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
262                    let mut builder = Date32Array::builder(4);
263                    builder.append_value(expected.unwrap());
264
265                    assert_eq!(
266                        &builder.finish() as &dyn Array,
267                        a.as_ref(),
268                        "{}: to_date created wrong value",
269                        tc.name
270                    );
271                }
272                _ => panic!("Could not convert '{}' to Date", tc.date_str),
273            }
274        }
275    }
276
277    #[test]
278    fn test_to_date_with_format() {
279        struct TestCase {
280            name: &'static str,
281            date_str: &'static str,
282            format_str: &'static str,
283            formatted_date: &'static str,
284        }
285
286        let test_cases = vec![
287            TestCase {
288                name: "Largest four-digit year (9999)",
289                date_str: "9999-12-31",
290                format_str: "%Y%m%d",
291                formatted_date: "99991231",
292            },
293            TestCase {
294                name: "Smallest four-digit year (-9999)",
295                date_str: "-9999-12-31",
296                format_str: "%Y/%m/%d",
297                formatted_date: "-9999/12/31",
298            },
299            TestCase {
300                name: "Year 1 (0001)",
301                date_str: "0001-12-31",
302                format_str: "%Y%m%d",
303                formatted_date: "00011231",
304            },
305            TestCase {
306                name: "Year before epoch (1969)",
307                date_str: "1969-01-01",
308                format_str: "%Y%m%d",
309                formatted_date: "19690101",
310            },
311            TestCase {
312                name: "Switch Julian/Gregorian calendar (1582-10-10)",
313                date_str: "1582-10-10",
314                format_str: "%Y%m%d",
315                formatted_date: "15821010",
316            },
317            TestCase {
318                name: "Negative Year, BC (-42-01-01)",
319                date_str: "-42-01-01",
320                format_str: "%Y/%m/%d",
321                formatted_date: "-42/01/01",
322            },
323        ];
324
325        for tc in &test_cases {
326            test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
327            test_scalar(
328                ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
329                tc,
330            );
331            test_scalar(
332                ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
333                tc,
334            );
335
336            test_array::<GenericStringArray<i32>>(tc);
337            test_array::<GenericStringArray<i64>>(tc);
338            test_array::<StringViewArray>(tc);
339        }
340
341        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
342            let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
343
344            let to_date_result = invoke_to_date_with_args(
345                vec![
346                    ColumnarValue::Scalar(sv),
347                    ColumnarValue::Scalar(format_scalar),
348                ],
349                1,
350            );
351
352            match to_date_result {
353                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
354                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
355                    assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
356                }
357                _ => panic!(
358                    "Could not convert '{}' with format string '{}'to Date",
359                    tc.date_str, tc.format_str
360                ),
361            }
362        }
363
364        fn test_array<A>(tc: &TestCase)
365        where
366            A: From<Vec<&'static str>> + Array + 'static,
367        {
368            let date_array = A::from(vec![tc.formatted_date]);
369            let format_array = A::from(vec![tc.format_str]);
370            let batch_len = date_array.len();
371
372            let to_date_result = invoke_to_date_with_args(
373                vec![
374                    ColumnarValue::Array(Arc::new(date_array)),
375                    ColumnarValue::Array(Arc::new(format_array)),
376                ],
377                batch_len,
378            );
379
380            match to_date_result {
381                Ok(ColumnarValue::Array(a)) => {
382                    assert_eq!(a.len(), 1);
383
384                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
385                    let mut builder = Date32Array::builder(4);
386                    builder.append_value(expected.unwrap());
387
388                    assert_eq!(
389                        &builder.finish() as &dyn Array, a.as_ref(),
390                        "{}: to_date created wrong value for date '{}' with format string '{}'",
391                        tc.name,
392                        tc.formatted_date,
393                        tc.format_str
394                    );
395                }
396                _ => panic!(
397                    "Could not convert '{}' with format string '{}'to Date: {:?}",
398                    tc.formatted_date, tc.format_str, to_date_result
399                ),
400            }
401        }
402    }
403
404    #[test]
405    fn test_to_date_multiple_format_strings() {
406        let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
407        let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
408        let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
409
410        let to_date_result = invoke_to_date_with_args(
411            vec![
412                ColumnarValue::Scalar(formatted_date_scalar),
413                ColumnarValue::Scalar(format1_scalar),
414                ColumnarValue::Scalar(format2_scalar),
415            ],
416            1,
417        );
418
419        match to_date_result {
420            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
421                let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
422                assert_eq!(
423                    date_val, expected,
424                    "to_date created wrong value for date with 2 format strings"
425                );
426            }
427            _ => panic!("Conversion failed",),
428        }
429    }
430
431    #[test]
432    fn test_to_date_from_timestamp() {
433        let test_cases = vec![
434            "2020-09-08T13:42:29Z",
435            "2020-09-08T13:42:29.190855-05:00",
436            "2020-09-08 12:13:29",
437        ];
438        for date_str in test_cases {
439            let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
440
441            let to_date_result = invoke_to_date_with_args(
442                vec![ColumnarValue::Scalar(formatted_date_scalar)],
443                1,
444            );
445
446            match to_date_result {
447                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
448                    let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
449                    assert_eq!(date_val, expected, "to_date created wrong value");
450                }
451                _ => panic!("Conversion of {date_str} failed"),
452            }
453        }
454    }
455
456    #[test]
457    fn test_to_date_string_with_valid_number() {
458        let date_str = "20241231";
459        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
460
461        let to_date_result =
462            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
463
464        match to_date_result {
465            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
466                let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
467                assert_eq!(
468                    date_val, expected,
469                    "to_date created wrong value for {date_str}"
470                );
471            }
472            _ => panic!("Conversion of {date_str} failed"),
473        }
474    }
475
476    #[test]
477    fn test_to_date_string_with_invalid_number() {
478        let date_str = "202412311";
479        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
480
481        let to_date_result =
482            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
483
484        if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
485            panic!("Conversion of {date_str} succeeded, but should have failed. ");
486        }
487    }
488}