Skip to main content

datafusion_functions/datetime/
to_date.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::compute::cast_with_options;
20use arrow::datatypes::DataType;
21use arrow::datatypes::DataType::*;
22use arrow::error::ArrowError::ParseError;
23use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
24use datafusion_common::format::DEFAULT_CAST_OPTIONS;
25use datafusion_common::{Result, arrow_err, exec_err, internal_datafusion_err};
26use datafusion_expr::{
27    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28    Volatility,
29};
30use datafusion_macros::user_doc;
31
32#[user_doc(
33    doc_section(label = "Time and Date Functions"),
34    description = r"Converts a value to a date (`YYYY-MM-DD`).
35Supports strings, numeric and timestamp types as input.
36Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
37Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
38Returns the corresponding date.
39
40Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
41    syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
42    sql_example = r#"```sql
43> select to_date('2023-01-31');
44+-------------------------------+
45| to_date(Utf8("2023-01-31")) |
46+-------------------------------+
47| 2023-01-31                    |
48+-------------------------------+
49> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
50+---------------------------------------------------------------------+
51| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
52+---------------------------------------------------------------------+
53| 2023-01-31                                                          |
54+---------------------------------------------------------------------+
55```
56
57Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
58"#,
59    standard_argument(name = "expression", prefix = "String"),
60    argument(
61        name = "format_n",
62        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
63  they appear with the first successful one being returned. If none of the formats successfully parse the expression
64  an error will be returned."
65    )
66)]
67#[derive(Debug, PartialEq, Eq, Hash)]
68pub struct ToDateFunc {
69    signature: Signature,
70}
71
72impl Default for ToDateFunc {
73    fn default() -> Self {
74        Self::new()
75    }
76}
77
78impl ToDateFunc {
79    pub fn new() -> Self {
80        Self {
81            signature: Signature::variadic_any(Volatility::Immutable),
82        }
83    }
84
85    fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
86        match args.len() {
87            1 => handle::<Date32Type, _>(
88                args,
89                |s| match Date32Type::parse(s) {
90                    Some(v) => Ok(v),
91                    None => arrow_err!(ParseError(
92                        "Unable to cast to Date32 for converting from i64 to i32 failed"
93                            .to_string()
94                    )),
95                },
96                "to_date",
97                &Date32,
98            ),
99            2.. => handle_multiple::<Date32Type, _, _>(
100                args,
101                |s, format| {
102                    string_to_timestamp_millis_formatted(s, format)
103                        .map(|n| n / (24 * 60 * 60 * 1_000))
104                        .and_then(|v| {
105                            v.try_into().map_err(|_| {
106                                internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
107                            })
108                        })
109                },
110                |n| n,
111                "to_date",
112                &Date32,
113            ),
114            0 => exec_err!("Unsupported 0 argument count for function to_date"),
115        }
116    }
117}
118
119impl ScalarUDFImpl for ToDateFunc {
120    fn name(&self) -> &str {
121        "to_date"
122    }
123
124    fn signature(&self) -> &Signature {
125        &self.signature
126    }
127
128    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
129        Ok(Date32)
130    }
131
132    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
133        let args = args.args;
134        if args.is_empty() {
135            return exec_err!("to_date function requires 1 or more arguments, got 0");
136        }
137
138        // validate that any args after the first one are Utf8
139        if args.len() > 1 {
140            validate_data_types(&args, "to_date")?;
141        }
142
143        match args[0].data_type() {
144            Null | Int32 | Int64 | Date32 | Date64 | Timestamp(_, _) => {
145                args[0].cast_to(&Date32, None)
146            }
147            UInt8 | UInt16 | UInt32 | UInt64 | Int8 | Int16 => {
148                // Arrow cast doesn't support direct casting of these types to date32
149                // as it only supports Int32 and Int64. To work around that limitation,
150                // use cast_with_options to cast to Int32 and then cast the result of
151                // that to Date32.
152                match &args[0] {
153                    ColumnarValue::Array(array) => {
154                        Ok(ColumnarValue::Array(cast_with_options(
155                            &cast_with_options(&array, &Int32, &DEFAULT_CAST_OPTIONS)?,
156                            &Date32,
157                            &DEFAULT_CAST_OPTIONS,
158                        )?))
159                    }
160                    ColumnarValue::Scalar(scalar) => {
161                        let sv =
162                            scalar.cast_to_with_options(&Int32, &DEFAULT_CAST_OPTIONS)?;
163                        Ok(ColumnarValue::Scalar(
164                            sv.cast_to_with_options(&Date32, &DEFAULT_CAST_OPTIONS)?,
165                        ))
166                    }
167                }
168            }
169            Float16
170            | Float32
171            | Float64
172            | Decimal32(_, _)
173            | Decimal64(_, _)
174            | Decimal128(_, _)
175            | Decimal256(_, _) => {
176                // The only way this makes sense is to get the Int64 value of the float
177                // or decimal and then cast that to Date32.
178                args[0].cast_to(&Int64, None)?.cast_to(&Date32, None)
179            }
180            Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
181            other => {
182                exec_err!("Unsupported data type {} for function to_date", other)
183            }
184        }
185    }
186
187    fn documentation(&self) -> Option<&Documentation> {
188        self.doc()
189    }
190}
191
192#[cfg(test)]
193mod tests {
194    use super::ToDateFunc;
195    use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
196    use arrow::datatypes::{DataType, Field};
197    use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
198    use datafusion_common::config::ConfigOptions;
199    use datafusion_common::{DataFusionError, ScalarValue};
200    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
201    use std::sync::Arc;
202
203    fn invoke_to_date_with_args(
204        args: Vec<ColumnarValue>,
205        number_rows: usize,
206    ) -> Result<ColumnarValue, DataFusionError> {
207        let arg_fields = args
208            .iter()
209            .map(|arg| Field::new("a", arg.data_type(), true).into())
210            .collect::<Vec<_>>();
211
212        let args = ScalarFunctionArgs {
213            args,
214            arg_fields,
215            number_rows,
216            return_field: Field::new("f", DataType::Date32, true).into(),
217            config_options: Arc::new(ConfigOptions::default()),
218        };
219        ToDateFunc::new().invoke_with_args(args)
220    }
221
222    #[test]
223    fn test_to_date_without_format() {
224        struct TestCase {
225            name: &'static str,
226            date_str: &'static str,
227        }
228
229        let test_cases = vec![
230            TestCase {
231                name: "Largest four-digit year (9999)",
232                date_str: "9999-12-31",
233            },
234            TestCase {
235                name: "Year 1 (0001)",
236                date_str: "0001-12-31",
237            },
238            TestCase {
239                name: "Year before epoch (1969)",
240                date_str: "1969-01-01",
241            },
242            TestCase {
243                name: "Switch Julian/Gregorian calendar (1582-10-10)",
244                date_str: "1582-10-10",
245            },
246        ];
247
248        for tc in &test_cases {
249            test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
250            test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
251            test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
252
253            test_array::<GenericStringArray<i32>>(tc);
254            test_array::<GenericStringArray<i64>>(tc);
255            test_array::<StringViewArray>(tc);
256        }
257
258        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
259            let to_date_result =
260                invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
261
262            match to_date_result {
263                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
264                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
265                    assert_eq!(
266                        date_val, expected,
267                        "{}: to_date created wrong value",
268                        tc.name
269                    );
270                }
271                _ => panic!("Could not convert '{}' to Date", tc.date_str),
272            }
273        }
274
275        fn test_array<A>(tc: &TestCase)
276        where
277            A: From<Vec<&'static str>> + Array + 'static,
278        {
279            let date_array = A::from(vec![tc.date_str]);
280            let batch_len = date_array.len();
281            let to_date_result = invoke_to_date_with_args(
282                vec![ColumnarValue::Array(Arc::new(date_array))],
283                batch_len,
284            );
285
286            match to_date_result {
287                Ok(ColumnarValue::Array(a)) => {
288                    assert_eq!(a.len(), 1);
289
290                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
291                    let mut builder = Date32Array::builder(4);
292                    builder.append_value(expected.unwrap());
293
294                    assert_eq!(
295                        &builder.finish() as &dyn Array,
296                        a.as_ref(),
297                        "{}: to_date created wrong value",
298                        tc.name
299                    );
300                }
301                _ => panic!("Could not convert '{}' to Date", tc.date_str),
302            }
303        }
304    }
305
306    #[test]
307    fn test_to_date_with_format() {
308        struct TestCase {
309            name: &'static str,
310            date_str: &'static str,
311            format_str: &'static str,
312            formatted_date: &'static str,
313        }
314
315        let test_cases = vec![
316            TestCase {
317                name: "Largest four-digit year (9999)",
318                date_str: "9999-12-31",
319                format_str: "%Y%m%d",
320                formatted_date: "99991231",
321            },
322            TestCase {
323                name: "Smallest four-digit year (-9999)",
324                date_str: "-9999-12-31",
325                format_str: "%Y/%m/%d",
326                formatted_date: "-9999/12/31",
327            },
328            TestCase {
329                name: "Year 1 (0001)",
330                date_str: "0001-12-31",
331                format_str: "%Y%m%d",
332                formatted_date: "00011231",
333            },
334            TestCase {
335                name: "Year before epoch (1969)",
336                date_str: "1969-01-01",
337                format_str: "%Y%m%d",
338                formatted_date: "19690101",
339            },
340            TestCase {
341                name: "Switch Julian/Gregorian calendar (1582-10-10)",
342                date_str: "1582-10-10",
343                format_str: "%Y%m%d",
344                formatted_date: "15821010",
345            },
346            TestCase {
347                name: "Negative Year, BC (-42-01-01)",
348                date_str: "-42-01-01",
349                format_str: "%Y/%m/%d",
350                formatted_date: "-42/01/01",
351            },
352        ];
353
354        for tc in &test_cases {
355            test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
356            test_scalar(
357                ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
358                tc,
359            );
360            test_scalar(
361                ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
362                tc,
363            );
364
365            test_array::<GenericStringArray<i32>>(tc);
366            test_array::<GenericStringArray<i64>>(tc);
367            test_array::<StringViewArray>(tc);
368        }
369
370        fn test_scalar(sv: ScalarValue, tc: &TestCase) {
371            let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
372
373            let to_date_result = invoke_to_date_with_args(
374                vec![
375                    ColumnarValue::Scalar(sv),
376                    ColumnarValue::Scalar(format_scalar),
377                ],
378                1,
379            );
380
381            match to_date_result {
382                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
383                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
384                    assert_eq!(
385                        date_val, expected,
386                        "{}: to_date created wrong value for date '{}' with format string '{}'",
387                        tc.name, tc.formatted_date, tc.format_str
388                    );
389                }
390                _ => panic!(
391                    "Could not convert '{}' with format string '{}'to Date",
392                    tc.date_str, tc.format_str
393                ),
394            }
395        }
396
397        fn test_array<A>(tc: &TestCase)
398        where
399            A: From<Vec<&'static str>> + Array + 'static,
400        {
401            let date_array = A::from(vec![tc.formatted_date]);
402            let format_array = A::from(vec![tc.format_str]);
403            let batch_len = date_array.len();
404
405            let to_date_result = invoke_to_date_with_args(
406                vec![
407                    ColumnarValue::Array(Arc::new(date_array)),
408                    ColumnarValue::Array(Arc::new(format_array)),
409                ],
410                batch_len,
411            );
412
413            match to_date_result {
414                Ok(ColumnarValue::Array(a)) => {
415                    assert_eq!(a.len(), 1);
416
417                    let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
418                    let mut builder = Date32Array::builder(4);
419                    builder.append_value(expected.unwrap());
420
421                    assert_eq!(
422                        &builder.finish() as &dyn Array,
423                        a.as_ref(),
424                        "{}: to_date created wrong value for date '{}' with format string '{}'",
425                        tc.name,
426                        tc.formatted_date,
427                        tc.format_str
428                    );
429                }
430                _ => panic!(
431                    "Could not convert '{}' with format string '{}'to Date: {:?}",
432                    tc.formatted_date, tc.format_str, to_date_result
433                ),
434            }
435        }
436    }
437
438    #[test]
439    fn test_to_date_multiple_format_strings() {
440        let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
441        let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
442        let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
443
444        let to_date_result = invoke_to_date_with_args(
445            vec![
446                ColumnarValue::Scalar(formatted_date_scalar),
447                ColumnarValue::Scalar(format1_scalar),
448                ColumnarValue::Scalar(format2_scalar),
449            ],
450            1,
451        );
452
453        match to_date_result {
454            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
455                let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
456                assert_eq!(
457                    date_val, expected,
458                    "to_date created wrong value for date with 2 format strings"
459                );
460            }
461            _ => panic!("Conversion failed",),
462        }
463    }
464
465    #[test]
466    fn test_to_date_from_timestamp() {
467        let test_cases = vec![
468            "2020-09-08T13:42:29Z",
469            "2020-09-08T13:42:29.190855-05:00",
470            "2020-09-08 12:13:29",
471        ];
472        for date_str in test_cases {
473            let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
474
475            let to_date_result = invoke_to_date_with_args(
476                vec![ColumnarValue::Scalar(formatted_date_scalar)],
477                1,
478            );
479
480            match to_date_result {
481                Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
482                    let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
483                    assert_eq!(date_val, expected, "to_date created wrong value");
484                }
485                _ => panic!("Conversion of {date_str} failed"),
486            }
487        }
488    }
489
490    #[test]
491    fn test_to_date_string_with_valid_number() {
492        let date_str = "20241231";
493        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
494
495        let to_date_result =
496            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
497
498        match to_date_result {
499            Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
500                let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
501                assert_eq!(
502                    date_val, expected,
503                    "to_date created wrong value for {date_str}"
504                );
505            }
506            _ => panic!("Conversion of {date_str} failed"),
507        }
508    }
509
510    #[test]
511    fn test_to_date_string_with_invalid_number() {
512        let date_str = "202412311";
513        let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
514
515        let to_date_result =
516            invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
517
518        if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
519            panic!("Conversion of {date_str} succeeded, but should have failed. ");
520        }
521    }
522}