datafusion_functions/datetime/
to_date.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32    doc_section(label = "Time and Date Functions"),
33    description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40    syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41    sql_example = r#"```sql
42> select to_date('2023-01-31'); 
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31                    |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31                                                          |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs)
57"#,
58    standard_argument(name = "expression", prefix = "String"),
59    argument(
60        name = "format_n",
61        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62  they appear with the first successful one being returned. If none of the formats successfully parse the expression
63  an error will be returned."
64    )
65)]
66#[derive(Debug)]
67pub struct ToDateFunc {
68    signature: Signature,
69}
70
71impl Default for ToDateFunc {
72    fn default() -> Self {
73        Self::new()
74    }
75}
76
77impl ToDateFunc {
78    pub fn new() -> Self {
79        Self {
80            signature: Signature::variadic_any(Volatility::Immutable),
81        }
82    }
83
84    fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85        match args.len() {
86            1 => handle::<Date32Type, _, Date32Type>(
87                args,
88                |s| match Date32Type::parse(s) {
89                    Some(v) => Ok(v),
90                    None => arrow_err!(ParseError(
91                        "Unable to cast to Date32 for converting from i64 to i32 failed"
92                            .to_string()
93                    )),
94                },
95                "to_date",
96            ),
97            2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98                args,
99                |s, format| {
100                    string_to_timestamp_millis_formatted(s, format)
101                        .map(|n| n / (24 * 60 * 60 * 1_000))
102                        .and_then(|v| {
103                            v.try_into().map_err(|_| {
104                                internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105                            })
106                        })
107                },
108                |n| n,
109                "to_date",
110            ),
111            0 => exec_err!("Unsupported 0 argument count for function to_date"),
112        }
113    }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117    fn as_any(&self) -> &dyn Any {
118        self
119    }
120
121    fn name(&self) -> &str {
122        "to_date"
123    }
124
125    fn signature(&self) -> &Signature {
126        &self.signature
127    }
128
129    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130        Ok(Date32)
131    }
132
133    fn invoke_with_args(
134        &self,
135        args: datafusion_expr::ScalarFunctionArgs,
136    ) -> Result<ColumnarValue> {
137        let args = args.args;
138        if args.is_empty() {
139            return exec_err!("to_date function requires 1 or more arguments, got 0");
140        }
141
142        // validate that any args after the first one are Utf8
143        if args.len() > 1 {
144            validate_data_types(&args, "to_date")?;
145        }
146
147        match args[0].data_type() {
148            Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149                args[0].cast_to(&Date32, None)
150            }
151            Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152            other => {
153                exec_err!("Unsupported data type {:?} for function to_date", other)
154            }
155        }
156    }
157
158    fn documentation(&self) -> Option<&Documentation> {
159        self.doc()
160    }
161}
162
163#[cfg(test)]
164mod tests {
165    use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
166    use arrow::datatypes::{DataType, Field};
167    use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
168    use datafusion_common::{DataFusionError, ScalarValue};
169    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
170    use std::sync::Arc;
171
172    use super::ToDateFunc;
173
174    fn invoke_to_date_with_args(
175        args: Vec<ColumnarValue>,
176        number_rows: usize,
177    ) -> Result<ColumnarValue, DataFusionError> {
178        let arg_fields = args
179            .iter()
180            .map(|arg| Field::new("a", arg.data_type(), true).into())
181            .collect::<Vec<_>>();
182
183        let args = datafusion_expr::ScalarFunctionArgs {
184            args,
185            arg_fields,
186            number_rows,
187            return_field: Field::new("f", DataType::Date32, true).into(),
188        };
189        ToDateFunc::new().invoke_with_args(args)
190    }
191
192    #[test]
193    fn test_to_date_without_format() {
194        struct TestCase {
195            name: &'static str,
196            date_str: &'static str,
197        }
198
199        let test_cases = vec![
200            TestCase {
201                name: "Largest four-digit year (9999)",
202                date_str: "9999-12-31",
203            },
204            TestCase {
205                name: "Year 1 (0001)",
206                date_str: "0001-12-31",
207            },
208            TestCase {
209                name: "Year before epoch (1969)",
210                date_str: "1969-01-01",
211            },
212            TestCase {
213                name: "Switch Julian/Gregorian calendar (1582-10-10)",
214                date_str: "1582-10-10",
215            },
216        ];
217
218        for tc in &test_cases {
219            test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
220            test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
221            test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
222
223            test_array::<GenericStringArray<i32>>(tc);
224            test_array::<GenericStringArray<i64>>(tc);
225            test_array::<StringViewArray>(tc);
226        }
227
228        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
229            let to_date_result =
230                invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
231
232            match to_date_result {
233                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
234                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
235                    assert_eq!(
236                        date_val, expected,
237                        "{}: to_date created wrong value",
238                        tc.name
239                    );
240                }
241                _ => panic!("Could not convert '{}' to Date", tc.date_str),
242            }
243        }
244
245        fn test_array<A>(tc: &TestCase)
246        where
247            A: From<Vec<&'static str>> + Array + 'static,
248        {
249            let date_array = A::from(vec![tc.date_str]);
250            let batch_len = date_array.len();
251            let to_date_result = invoke_to_date_with_args(
252                vec![ColumnarValue::Array(Arc::new(date_array))],
253                batch_len,
254            );
255
256            match to_date_result {
257                Ok(ColumnarValue::Array(a)) => {
258                    assert_eq!(a.len(), 1);
259
260                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
261                    let mut builder = Date32Array::builder(4);
262                    builder.append_value(expected.unwrap());
263
264                    assert_eq!(
265                        &builder.finish() as &dyn Array,
266                        a.as_ref(),
267                        "{}: to_date created wrong value",
268                        tc.name
269                    );
270                }
271                _ => panic!("Could not convert '{}' to Date", tc.date_str),
272            }
273        }
274    }
275
276    #[test]
277    fn test_to_date_with_format() {
278        struct TestCase {
279            name: &'static str,
280            date_str: &'static str,
281            format_str: &'static str,
282            formatted_date: &'static str,
283        }
284
285        let test_cases = vec![
286            TestCase {
287                name: "Largest four-digit year (9999)",
288                date_str: "9999-12-31",
289                format_str: "%Y%m%d",
290                formatted_date: "99991231",
291            },
292            TestCase {
293                name: "Smallest four-digit year (-9999)",
294                date_str: "-9999-12-31",
295                format_str: "%Y/%m/%d",
296                formatted_date: "-9999/12/31",
297            },
298            TestCase {
299                name: "Year 1 (0001)",
300                date_str: "0001-12-31",
301                format_str: "%Y%m%d",
302                formatted_date: "00011231",
303            },
304            TestCase {
305                name: "Year before epoch (1969)",
306                date_str: "1969-01-01",
307                format_str: "%Y%m%d",
308                formatted_date: "19690101",
309            },
310            TestCase {
311                name: "Switch Julian/Gregorian calendar (1582-10-10)",
312                date_str: "1582-10-10",
313                format_str: "%Y%m%d",
314                formatted_date: "15821010",
315            },
316            TestCase {
317                name: "Negative Year, BC (-42-01-01)",
318                date_str: "-42-01-01",
319                format_str: "%Y/%m/%d",
320                formatted_date: "-42/01/01",
321            },
322        ];
323
324        for tc in &test_cases {
325            test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
326            test_scalar(
327                ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
328                tc,
329            );
330            test_scalar(
331                ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
332                tc,
333            );
334
335            test_array::<GenericStringArray<i32>>(tc);
336            test_array::<GenericStringArray<i64>>(tc);
337            test_array::<StringViewArray>(tc);
338        }
339
340        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
341            let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
342
343            let to_date_result = invoke_to_date_with_args(
344                vec![
345                    ColumnarValue::Scalar(sv),
346                    ColumnarValue::Scalar(format_scalar),
347                ],
348                1,
349            );
350
351            match to_date_result {
352                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
353                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
354                    assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
355                }
356                _ => panic!(
357                    "Could not convert '{}' with format string '{}'to Date",
358                    tc.date_str, tc.format_str
359                ),
360            }
361        }
362
363        fn test_array<A>(tc: &TestCase)
364        where
365            A: From<Vec<&'static str>> + Array + 'static,
366        {
367            let date_array = A::from(vec![tc.formatted_date]);
368            let format_array = A::from(vec![tc.format_str]);
369            let batch_len = date_array.len();
370
371            let to_date_result = invoke_to_date_with_args(
372                vec![
373                    ColumnarValue::Array(Arc::new(date_array)),
374                    ColumnarValue::Array(Arc::new(format_array)),
375                ],
376                batch_len,
377            );
378
379            match to_date_result {
380                Ok(ColumnarValue::Array(a)) => {
381                    assert_eq!(a.len(), 1);
382
383                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
384                    let mut builder = Date32Array::builder(4);
385                    builder.append_value(expected.unwrap());
386
387                    assert_eq!(
388                        &builder.finish() as &dyn Array, a.as_ref(),
389                        "{}: to_date created wrong value for date '{}' with format string '{}'",
390                        tc.name,
391                        tc.formatted_date,
392                        tc.format_str
393                    );
394                }
395                _ => panic!(
396                    "Could not convert '{}' with format string '{}'to Date: {:?}",
397                    tc.formatted_date, tc.format_str, to_date_result
398                ),
399            }
400        }
401    }
402
403    #[test]
404    fn test_to_date_multiple_format_strings() {
405        let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
406        let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
407        let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
408
409        let to_date_result = invoke_to_date_with_args(
410            vec![
411                ColumnarValue::Scalar(formatted_date_scalar),
412                ColumnarValue::Scalar(format1_scalar),
413                ColumnarValue::Scalar(format2_scalar),
414            ],
415            1,
416        );
417
418        match to_date_result {
419            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
420                let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
421                assert_eq!(
422                    date_val, expected,
423                    "to_date created wrong value for date with 2 format strings"
424                );
425            }
426            _ => panic!("Conversion failed",),
427        }
428    }
429
430    #[test]
431    fn test_to_date_from_timestamp() {
432        let test_cases = vec![
433            "2020-09-08T13:42:29Z",
434            "2020-09-08T13:42:29.190855-05:00",
435            "2020-09-08 12:13:29",
436        ];
437        for date_str in test_cases {
438            let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
439
440            let to_date_result = invoke_to_date_with_args(
441                vec![ColumnarValue::Scalar(formatted_date_scalar)],
442                1,
443            );
444
445            match to_date_result {
446                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
447                    let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
448                    assert_eq!(date_val, expected, "to_date created wrong value");
449                }
450                _ => panic!("Conversion of {date_str} failed"),
451            }
452        }
453    }
454
455    #[test]
456    fn test_to_date_string_with_valid_number() {
457        let date_str = "20241231";
458        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
459
460        let to_date_result =
461            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
462
463        match to_date_result {
464            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
465                let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
466                assert_eq!(
467                    date_val, expected,
468                    "to_date created wrong value for {date_str}"
469                );
470            }
471            _ => panic!("Conversion of {date_str} failed"),
472        }
473    }
474
475    #[test]
476    fn test_to_date_string_with_invalid_number() {
477        let date_str = "202412311";
478        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
479
480        let to_date_result =
481            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
482
483        if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
484            panic!("Conversion of {date_str} succeeded, but should have failed. ");
485        }
486    }
487}