1use crate::datetime::common::*;
19use arrow::datatypes::DataType;
20use arrow::datatypes::DataType::*;
21use arrow::error::ArrowError::ParseError;
22use arrow::{array::types::Date32Type, compute::kernels::cast_utils::Parser};
23use datafusion_common::error::DataFusionError;
24use datafusion_common::{arrow_err, exec_err, internal_datafusion_err, Result};
25use datafusion_expr::{
26 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
27};
28use datafusion_macros::user_doc;
29use std::any::Any;
30
31#[user_doc(
32 doc_section(label = "Time and Date Functions"),
33 description = r"Converts a value to a date (`YYYY-MM-DD`).
34Supports strings, integer and double types as input.
35Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
36Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`).
37Returns the corresponding date.
38
39Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`.",
40 syntax_example = "to_date('2017-05-31', '%Y-%m-%d')",
41 sql_example = r#"```sql
42> select to_date('2023-01-31');
43+-------------------------------+
44| to_date(Utf8("2023-01-31")) |
45+-------------------------------+
46| 2023-01-31 |
47+-------------------------------+
48> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d');
49+---------------------------------------------------------------------+
50| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) |
51+---------------------------------------------------------------------+
52| 2023-01-31 |
53+---------------------------------------------------------------------+
54```
55
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
57"#,
58 standard_argument(name = "expression", prefix = "String"),
59 argument(
60 name = "format_n",
61 description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
62 they appear with the first successful one being returned. If none of the formats successfully parse the expression
63 an error will be returned."
64 )
65)]
66#[derive(Debug, PartialEq, Eq, Hash)]
67pub struct ToDateFunc {
68 signature: Signature,
69}
70
71impl Default for ToDateFunc {
72 fn default() -> Self {
73 Self::new()
74 }
75}
76
77impl ToDateFunc {
78 pub fn new() -> Self {
79 Self {
80 signature: Signature::variadic_any(Volatility::Immutable),
81 }
82 }
83
84 fn to_date(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
85 match args.len() {
86 1 => handle::<Date32Type, _, Date32Type>(
87 args,
88 |s| match Date32Type::parse(s) {
89 Some(v) => Ok(v),
90 None => arrow_err!(ParseError(
91 "Unable to cast to Date32 for converting from i64 to i32 failed"
92 .to_string()
93 )),
94 },
95 "to_date",
96 ),
97 2.. => handle_multiple::<Date32Type, _, Date32Type, _>(
98 args,
99 |s, format| {
100 string_to_timestamp_millis_formatted(s, format)
101 .map(|n| n / (24 * 60 * 60 * 1_000))
102 .and_then(|v| {
103 v.try_into().map_err(|_| {
104 internal_datafusion_err!("Unable to cast to Date32 for converting from i64 to i32 failed")
105 })
106 })
107 },
108 |n| n,
109 "to_date",
110 ),
111 0 => exec_err!("Unsupported 0 argument count for function to_date"),
112 }
113 }
114}
115
116impl ScalarUDFImpl for ToDateFunc {
117 fn as_any(&self) -> &dyn Any {
118 self
119 }
120
121 fn name(&self) -> &str {
122 "to_date"
123 }
124
125 fn signature(&self) -> &Signature {
126 &self.signature
127 }
128
129 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
130 Ok(Date32)
131 }
132
133 fn invoke_with_args(
134 &self,
135 args: datafusion_expr::ScalarFunctionArgs,
136 ) -> Result<ColumnarValue> {
137 let args = args.args;
138 if args.is_empty() {
139 return exec_err!("to_date function requires 1 or more arguments, got 0");
140 }
141
142 if args.len() > 1 {
144 validate_data_types(&args, "to_date")?;
145 }
146
147 match args[0].data_type() {
148 Int32 | Int64 | Null | Float64 | Date32 | Date64 => {
149 args[0].cast_to(&Date32, None)
150 }
151 Utf8View | LargeUtf8 | Utf8 => self.to_date(&args),
152 other => {
153 exec_err!("Unsupported data type {:?} for function to_date", other)
154 }
155 }
156 }
157
158 fn documentation(&self) -> Option<&Documentation> {
159 self.doc()
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use super::ToDateFunc;
166 use arrow::array::{Array, Date32Array, GenericStringArray, StringViewArray};
167 use arrow::datatypes::{DataType, Field};
168 use arrow::{compute::kernels::cast_utils::Parser, datatypes::Date32Type};
169 use datafusion_common::config::ConfigOptions;
170 use datafusion_common::{DataFusionError, ScalarValue};
171 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
172 use std::sync::Arc;
173
174 fn invoke_to_date_with_args(
175 args: Vec<ColumnarValue>,
176 number_rows: usize,
177 ) -> Result<ColumnarValue, DataFusionError> {
178 let arg_fields = args
179 .iter()
180 .map(|arg| Field::new("a", arg.data_type(), true).into())
181 .collect::<Vec<_>>();
182
183 let args = datafusion_expr::ScalarFunctionArgs {
184 args,
185 arg_fields,
186 number_rows,
187 return_field: Field::new("f", DataType::Date32, true).into(),
188 config_options: Arc::new(ConfigOptions::default()),
189 };
190 ToDateFunc::new().invoke_with_args(args)
191 }
192
193 #[test]
194 fn test_to_date_without_format() {
195 struct TestCase {
196 name: &'static str,
197 date_str: &'static str,
198 }
199
200 let test_cases = vec![
201 TestCase {
202 name: "Largest four-digit year (9999)",
203 date_str: "9999-12-31",
204 },
205 TestCase {
206 name: "Year 1 (0001)",
207 date_str: "0001-12-31",
208 },
209 TestCase {
210 name: "Year before epoch (1969)",
211 date_str: "1969-01-01",
212 },
213 TestCase {
214 name: "Switch Julian/Gregorian calendar (1582-10-10)",
215 date_str: "1582-10-10",
216 },
217 ];
218
219 for tc in &test_cases {
220 test_scalar(ScalarValue::Utf8(Some(tc.date_str.to_string())), tc);
221 test_scalar(ScalarValue::LargeUtf8(Some(tc.date_str.to_string())), tc);
222 test_scalar(ScalarValue::Utf8View(Some(tc.date_str.to_string())), tc);
223
224 test_array::<GenericStringArray<i32>>(tc);
225 test_array::<GenericStringArray<i64>>(tc);
226 test_array::<StringViewArray>(tc);
227 }
228
229 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
230 let to_date_result =
231 invoke_to_date_with_args(vec![ColumnarValue::Scalar(sv)], 1);
232
233 match to_date_result {
234 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
235 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
236 assert_eq!(
237 date_val, expected,
238 "{}: to_date created wrong value",
239 tc.name
240 );
241 }
242 _ => panic!("Could not convert '{}' to Date", tc.date_str),
243 }
244 }
245
246 fn test_array<A>(tc: &TestCase)
247 where
248 A: From<Vec<&'static str>> + Array + 'static,
249 {
250 let date_array = A::from(vec![tc.date_str]);
251 let batch_len = date_array.len();
252 let to_date_result = invoke_to_date_with_args(
253 vec![ColumnarValue::Array(Arc::new(date_array))],
254 batch_len,
255 );
256
257 match to_date_result {
258 Ok(ColumnarValue::Array(a)) => {
259 assert_eq!(a.len(), 1);
260
261 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
262 let mut builder = Date32Array::builder(4);
263 builder.append_value(expected.unwrap());
264
265 assert_eq!(
266 &builder.finish() as &dyn Array,
267 a.as_ref(),
268 "{}: to_date created wrong value",
269 tc.name
270 );
271 }
272 _ => panic!("Could not convert '{}' to Date", tc.date_str),
273 }
274 }
275 }
276
277 #[test]
278 fn test_to_date_with_format() {
279 struct TestCase {
280 name: &'static str,
281 date_str: &'static str,
282 format_str: &'static str,
283 formatted_date: &'static str,
284 }
285
286 let test_cases = vec![
287 TestCase {
288 name: "Largest four-digit year (9999)",
289 date_str: "9999-12-31",
290 format_str: "%Y%m%d",
291 formatted_date: "99991231",
292 },
293 TestCase {
294 name: "Smallest four-digit year (-9999)",
295 date_str: "-9999-12-31",
296 format_str: "%Y/%m/%d",
297 formatted_date: "-9999/12/31",
298 },
299 TestCase {
300 name: "Year 1 (0001)",
301 date_str: "0001-12-31",
302 format_str: "%Y%m%d",
303 formatted_date: "00011231",
304 },
305 TestCase {
306 name: "Year before epoch (1969)",
307 date_str: "1969-01-01",
308 format_str: "%Y%m%d",
309 formatted_date: "19690101",
310 },
311 TestCase {
312 name: "Switch Julian/Gregorian calendar (1582-10-10)",
313 date_str: "1582-10-10",
314 format_str: "%Y%m%d",
315 formatted_date: "15821010",
316 },
317 TestCase {
318 name: "Negative Year, BC (-42-01-01)",
319 date_str: "-42-01-01",
320 format_str: "%Y/%m/%d",
321 formatted_date: "-42/01/01",
322 },
323 ];
324
325 for tc in &test_cases {
326 test_scalar(ScalarValue::Utf8(Some(tc.formatted_date.to_string())), tc);
327 test_scalar(
328 ScalarValue::LargeUtf8(Some(tc.formatted_date.to_string())),
329 tc,
330 );
331 test_scalar(
332 ScalarValue::Utf8View(Some(tc.formatted_date.to_string())),
333 tc,
334 );
335
336 test_array::<GenericStringArray<i32>>(tc);
337 test_array::<GenericStringArray<i64>>(tc);
338 test_array::<StringViewArray>(tc);
339 }
340
341 fn test_scalar(sv: ScalarValue, tc: &TestCase) {
342 let format_scalar = ScalarValue::Utf8(Some(tc.format_str.to_string()));
343
344 let to_date_result = invoke_to_date_with_args(
345 vec![
346 ColumnarValue::Scalar(sv),
347 ColumnarValue::Scalar(format_scalar),
348 ],
349 1,
350 );
351
352 match to_date_result {
353 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
354 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
355 assert_eq!(date_val, expected, "{}: to_date created wrong value for date '{}' with format string '{}'", tc.name, tc.formatted_date, tc.format_str);
356 }
357 _ => panic!(
358 "Could not convert '{}' with format string '{}'to Date",
359 tc.date_str, tc.format_str
360 ),
361 }
362 }
363
364 fn test_array<A>(tc: &TestCase)
365 where
366 A: From<Vec<&'static str>> + Array + 'static,
367 {
368 let date_array = A::from(vec![tc.formatted_date]);
369 let format_array = A::from(vec![tc.format_str]);
370 let batch_len = date_array.len();
371
372 let to_date_result = invoke_to_date_with_args(
373 vec![
374 ColumnarValue::Array(Arc::new(date_array)),
375 ColumnarValue::Array(Arc::new(format_array)),
376 ],
377 batch_len,
378 );
379
380 match to_date_result {
381 Ok(ColumnarValue::Array(a)) => {
382 assert_eq!(a.len(), 1);
383
384 let expected = Date32Type::parse_formatted(tc.date_str, "%Y-%m-%d");
385 let mut builder = Date32Array::builder(4);
386 builder.append_value(expected.unwrap());
387
388 assert_eq!(
389 &builder.finish() as &dyn Array, a.as_ref(),
390 "{}: to_date created wrong value for date '{}' with format string '{}'",
391 tc.name,
392 tc.formatted_date,
393 tc.format_str
394 );
395 }
396 _ => panic!(
397 "Could not convert '{}' with format string '{}'to Date: {:?}",
398 tc.formatted_date, tc.format_str, to_date_result
399 ),
400 }
401 }
402 }
403
404 #[test]
405 fn test_to_date_multiple_format_strings() {
406 let formatted_date_scalar = ScalarValue::Utf8(Some("2023/01/31".into()));
407 let format1_scalar = ScalarValue::Utf8(Some("%Y-%m-%d".into()));
408 let format2_scalar = ScalarValue::Utf8(Some("%Y/%m/%d".into()));
409
410 let to_date_result = invoke_to_date_with_args(
411 vec![
412 ColumnarValue::Scalar(formatted_date_scalar),
413 ColumnarValue::Scalar(format1_scalar),
414 ColumnarValue::Scalar(format2_scalar),
415 ],
416 1,
417 );
418
419 match to_date_result {
420 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
421 let expected = Date32Type::parse_formatted("2023-01-31", "%Y-%m-%d");
422 assert_eq!(
423 date_val, expected,
424 "to_date created wrong value for date with 2 format strings"
425 );
426 }
427 _ => panic!("Conversion failed",),
428 }
429 }
430
431 #[test]
432 fn test_to_date_from_timestamp() {
433 let test_cases = vec![
434 "2020-09-08T13:42:29Z",
435 "2020-09-08T13:42:29.190855-05:00",
436 "2020-09-08 12:13:29",
437 ];
438 for date_str in test_cases {
439 let formatted_date_scalar = ScalarValue::Utf8(Some(date_str.into()));
440
441 let to_date_result = invoke_to_date_with_args(
442 vec![ColumnarValue::Scalar(formatted_date_scalar)],
443 1,
444 );
445
446 match to_date_result {
447 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
448 let expected = Date32Type::parse_formatted("2020-09-08", "%Y-%m-%d");
449 assert_eq!(date_val, expected, "to_date created wrong value");
450 }
451 _ => panic!("Conversion of {date_str} failed"),
452 }
453 }
454 }
455
456 #[test]
457 fn test_to_date_string_with_valid_number() {
458 let date_str = "20241231";
459 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
460
461 let to_date_result =
462 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
463
464 match to_date_result {
465 Ok(ColumnarValue::Scalar(ScalarValue::Date32(date_val))) => {
466 let expected = Date32Type::parse_formatted("2024-12-31", "%Y-%m-%d");
467 assert_eq!(
468 date_val, expected,
469 "to_date created wrong value for {date_str}"
470 );
471 }
472 _ => panic!("Conversion of {date_str} failed"),
473 }
474 }
475
476 #[test]
477 fn test_to_date_string_with_invalid_number() {
478 let date_str = "202412311";
479 let date_scalar = ScalarValue::Utf8(Some(date_str.into()));
480
481 let to_date_result =
482 invoke_to_date_with_args(vec![ColumnarValue::Scalar(date_scalar)], 1);
483
484 if let Ok(ColumnarValue::Scalar(ScalarValue::Date32(_))) = to_date_result {
485 panic!("Conversion of {date_str} succeeded, but should have failed. ");
486 }
487 }
488}