1use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::array::Float64Array;
23use arrow::datatypes::DataType::*;
24use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
25use arrow::datatypes::{
26 ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
27 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
28};
29use datafusion_common::format::DEFAULT_CAST_OPTIONS;
30use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
31use datafusion_expr::{
32 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
33};
34use datafusion_macros::user_doc;
35
36#[user_doc(
37 doc_section(label = "Time and Date Functions"),
38 description = r#"
39Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
40
41Note: `to_timestamp` returns `Timestamp(ns)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
42"#,
43 syntax_example = "to_timestamp(expression[, ..., format_n])",
44 sql_example = r#"```sql
45> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
46+-----------------------------------------------------------+
47| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
48+-----------------------------------------------------------+
49| 2023-01-31T14:26:56.123456789 |
50+-----------------------------------------------------------+
51> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
52+--------------------------------------------------------------------------------------------------------+
53| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
54+--------------------------------------------------------------------------------------------------------+
55| 2023-05-17T03:59:00.123456789 |
56+--------------------------------------------------------------------------------------------------------+
57```
58Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
59"#,
60 argument(
61 name = "expression",
62 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
63 ),
64 argument(
65 name = "format_n",
66 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
67 )
68)]
69#[derive(Debug, PartialEq, Eq, Hash)]
70pub struct ToTimestampFunc {
71 signature: Signature,
72}
73
74#[user_doc(
75 doc_section(label = "Time and Date Functions"),
76 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
77 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
78 sql_example = r#"```sql
79> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
80+-------------------------------------------------------------------+
81| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
82+-------------------------------------------------------------------+
83| 2023-01-31T14:26:56 |
84+-------------------------------------------------------------------+
85> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
86+----------------------------------------------------------------------------------------------------------------+
87| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
88+----------------------------------------------------------------------------------------------------------------+
89| 2023-05-17T03:59:00 |
90+----------------------------------------------------------------------------------------------------------------+
91```
92Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
93"#,
94 argument(
95 name = "expression",
96 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
97 ),
98 argument(
99 name = "format_n",
100 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
101 )
102)]
103#[derive(Debug, PartialEq, Eq, Hash)]
104pub struct ToTimestampSecondsFunc {
105 signature: Signature,
106}
107
108#[user_doc(
109 doc_section(label = "Time and Date Functions"),
110 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
111 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
112 sql_example = r#"```sql
113> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
114+------------------------------------------------------------------+
115| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
116+------------------------------------------------------------------+
117| 2023-01-31T14:26:56.123 |
118+------------------------------------------------------------------+
119> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
120+---------------------------------------------------------------------------------------------------------------+
121| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
122+---------------------------------------------------------------------------------------------------------------+
123| 2023-05-17T03:59:00.123 |
124+---------------------------------------------------------------------------------------------------------------+
125```
126Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
127"#,
128 argument(
129 name = "expression",
130 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
131 ),
132 argument(
133 name = "format_n",
134 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
135 )
136)]
137#[derive(Debug, PartialEq, Eq, Hash)]
138pub struct ToTimestampMillisFunc {
139 signature: Signature,
140}
141
142#[user_doc(
143 doc_section(label = "Time and Date Functions"),
144 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
145 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
146 sql_example = r#"```sql
147> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
148+------------------------------------------------------------------+
149| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
150+------------------------------------------------------------------+
151| 2023-01-31T14:26:56.123456 |
152+------------------------------------------------------------------+
153> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
154+---------------------------------------------------------------------------------------------------------------+
155| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
156+---------------------------------------------------------------------------------------------------------------+
157| 2023-05-17T03:59:00.123456 |
158+---------------------------------------------------------------------------------------------------------------+
159```
160Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
161"#,
162 argument(
163 name = "expression",
164 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
165 ),
166 argument(
167 name = "format_n",
168 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
169 )
170)]
171#[derive(Debug, PartialEq, Eq, Hash)]
172pub struct ToTimestampMicrosFunc {
173 signature: Signature,
174}
175
176#[user_doc(
177 doc_section(label = "Time and Date Functions"),
178 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
179 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
180 sql_example = r#"```sql
181> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
182+-----------------------------------------------------------------+
183| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
184+-----------------------------------------------------------------+
185| 2023-01-31T14:26:56.123456789 |
186+-----------------------------------------------------------------+
187> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
188+--------------------------------------------------------------------------------------------------------------+
189| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
190+--------------------------------------------------------------------------------------------------------------+
191| 2023-05-17T03:59:00.123456789 |
192+---------------------------------------------------------------------------------------------------------------+
193```
194Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
195"#,
196 argument(
197 name = "expression",
198 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
199 ),
200 argument(
201 name = "format_n",
202 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
203 )
204)]
205#[derive(Debug, PartialEq, Eq, Hash)]
206pub struct ToTimestampNanosFunc {
207 signature: Signature,
208}
209
210impl Default for ToTimestampFunc {
211 fn default() -> Self {
212 Self::new()
213 }
214}
215
216impl ToTimestampFunc {
217 pub fn new() -> Self {
218 Self {
219 signature: Signature::variadic_any(Volatility::Immutable),
220 }
221 }
222}
223
224impl Default for ToTimestampSecondsFunc {
225 fn default() -> Self {
226 Self::new()
227 }
228}
229
230impl ToTimestampSecondsFunc {
231 pub fn new() -> Self {
232 Self {
233 signature: Signature::variadic_any(Volatility::Immutable),
234 }
235 }
236}
237
238impl Default for ToTimestampMillisFunc {
239 fn default() -> Self {
240 Self::new()
241 }
242}
243
244impl ToTimestampMillisFunc {
245 pub fn new() -> Self {
246 Self {
247 signature: Signature::variadic_any(Volatility::Immutable),
248 }
249 }
250}
251
252impl Default for ToTimestampMicrosFunc {
253 fn default() -> Self {
254 Self::new()
255 }
256}
257
258impl ToTimestampMicrosFunc {
259 pub fn new() -> Self {
260 Self {
261 signature: Signature::variadic_any(Volatility::Immutable),
262 }
263 }
264}
265
266impl Default for ToTimestampNanosFunc {
267 fn default() -> Self {
268 Self::new()
269 }
270}
271
272impl ToTimestampNanosFunc {
273 pub fn new() -> Self {
274 Self {
275 signature: Signature::variadic_any(Volatility::Immutable),
276 }
277 }
278}
279
280impl ScalarUDFImpl for ToTimestampFunc {
287 fn as_any(&self) -> &dyn Any {
288 self
289 }
290
291 fn name(&self) -> &str {
292 "to_timestamp"
293 }
294
295 fn signature(&self) -> &Signature {
296 &self.signature
297 }
298
299 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
300 Ok(return_type_for(&arg_types[0], Nanosecond))
301 }
302
303 fn invoke_with_args(
304 &self,
305 args: datafusion_expr::ScalarFunctionArgs,
306 ) -> Result<ColumnarValue> {
307 let args = args.args;
308 if args.is_empty() {
309 return exec_err!(
310 "to_timestamp function requires 1 or more arguments, got {}",
311 args.len()
312 );
313 }
314
315 if args.len() > 1 {
317 validate_data_types(&args, "to_timestamp")?;
318 }
319
320 match args[0].data_type() {
321 Int32 | Int64 => args[0]
322 .cast_to(&Timestamp(Second, None), None)?
323 .cast_to(&Timestamp(Nanosecond, None), None),
324 Null | Timestamp(_, None) => {
325 args[0].cast_to(&Timestamp(Nanosecond, None), None)
326 }
327 Float64 => {
328 let rescaled = arrow::compute::kernels::numeric::mul(
329 &args[0].to_array(1)?,
330 &arrow::array::Scalar::new(Float64Array::from(vec![
331 1_000_000_000f64,
332 ])),
333 )?;
334 Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
335 &rescaled,
336 &Timestamp(Nanosecond, None),
337 &DEFAULT_CAST_OPTIONS,
338 )?))
339 }
340 Timestamp(_, Some(tz)) => {
341 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
342 }
343 Utf8View | LargeUtf8 | Utf8 => {
344 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
345 }
346 Decimal128(_, _) => {
347 match &args[0] {
348 ColumnarValue::Scalar(ScalarValue::Decimal128(
349 Some(value),
350 _,
351 scale,
352 )) => {
353 let scale_factor = 10_i128.pow(*scale as u32);
355 let seconds = value / scale_factor;
356 let fraction = value % scale_factor;
357
358 let nanos = (fraction * 1_000_000_000) / scale_factor;
359
360 let timestamp_nanos = seconds * 1_000_000_000 + nanos;
361
362 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
363 Some(timestamp_nanos as i64),
364 None,
365 )))
366 }
367 _ => exec_err!("Invalid decimal value"),
368 }
369 }
370 other => {
371 exec_err!("Unsupported data type {other} for function to_timestamp")
372 }
373 }
374 }
375 fn documentation(&self) -> Option<&Documentation> {
376 self.doc()
377 }
378}
379
380impl ScalarUDFImpl for ToTimestampSecondsFunc {
381 fn as_any(&self) -> &dyn Any {
382 self
383 }
384
385 fn name(&self) -> &str {
386 "to_timestamp_seconds"
387 }
388
389 fn signature(&self) -> &Signature {
390 &self.signature
391 }
392
393 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
394 Ok(return_type_for(&arg_types[0], Second))
395 }
396
397 fn invoke_with_args(
398 &self,
399 args: datafusion_expr::ScalarFunctionArgs,
400 ) -> Result<ColumnarValue> {
401 let args = args.args;
402 if args.is_empty() {
403 return exec_err!(
404 "to_timestamp_seconds function requires 1 or more arguments, got {}",
405 args.len()
406 );
407 }
408
409 if args.len() > 1 {
411 validate_data_types(&args, "to_timestamp")?;
412 }
413
414 match args[0].data_type() {
415 Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
416 args[0].cast_to(&Timestamp(Second, None), None)
417 }
418 Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
419 Utf8View | LargeUtf8 | Utf8 => {
420 to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
421 }
422 other => {
423 exec_err!(
424 "Unsupported data type {} for function to_timestamp_seconds",
425 other
426 )
427 }
428 }
429 }
430 fn documentation(&self) -> Option<&Documentation> {
431 self.doc()
432 }
433}
434
435impl ScalarUDFImpl for ToTimestampMillisFunc {
436 fn as_any(&self) -> &dyn Any {
437 self
438 }
439
440 fn name(&self) -> &str {
441 "to_timestamp_millis"
442 }
443
444 fn signature(&self) -> &Signature {
445 &self.signature
446 }
447
448 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
449 Ok(return_type_for(&arg_types[0], Millisecond))
450 }
451
452 fn invoke_with_args(
453 &self,
454 args: datafusion_expr::ScalarFunctionArgs,
455 ) -> Result<ColumnarValue> {
456 let args = args.args;
457 if args.is_empty() {
458 return exec_err!(
459 "to_timestamp_millis function requires 1 or more arguments, got {}",
460 args.len()
461 );
462 }
463
464 if args.len() > 1 {
466 validate_data_types(&args, "to_timestamp")?;
467 }
468
469 match args[0].data_type() {
470 Null | Int32 | Int64 | Timestamp(_, None) => {
471 args[0].cast_to(&Timestamp(Millisecond, None), None)
472 }
473 Timestamp(_, Some(tz)) => {
474 args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
475 }
476 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
477 &args,
478 "to_timestamp_millis",
479 ),
480 other => {
481 exec_err!(
482 "Unsupported data type {} for function to_timestamp_millis",
483 other
484 )
485 }
486 }
487 }
488 fn documentation(&self) -> Option<&Documentation> {
489 self.doc()
490 }
491}
492
493impl ScalarUDFImpl for ToTimestampMicrosFunc {
494 fn as_any(&self) -> &dyn Any {
495 self
496 }
497
498 fn name(&self) -> &str {
499 "to_timestamp_micros"
500 }
501
502 fn signature(&self) -> &Signature {
503 &self.signature
504 }
505
506 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
507 Ok(return_type_for(&arg_types[0], Microsecond))
508 }
509
510 fn invoke_with_args(
511 &self,
512 args: datafusion_expr::ScalarFunctionArgs,
513 ) -> Result<ColumnarValue> {
514 let args = args.args;
515 if args.is_empty() {
516 return exec_err!(
517 "to_timestamp_micros function requires 1 or more arguments, got {}",
518 args.len()
519 );
520 }
521
522 if args.len() > 1 {
524 validate_data_types(&args, "to_timestamp")?;
525 }
526
527 match args[0].data_type() {
528 Null | Int32 | Int64 | Timestamp(_, None) => {
529 args[0].cast_to(&Timestamp(Microsecond, None), None)
530 }
531 Timestamp(_, Some(tz)) => {
532 args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
533 }
534 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
535 &args,
536 "to_timestamp_micros",
537 ),
538 other => {
539 exec_err!(
540 "Unsupported data type {} for function to_timestamp_micros",
541 other
542 )
543 }
544 }
545 }
546 fn documentation(&self) -> Option<&Documentation> {
547 self.doc()
548 }
549}
550
551impl ScalarUDFImpl for ToTimestampNanosFunc {
552 fn as_any(&self) -> &dyn Any {
553 self
554 }
555
556 fn name(&self) -> &str {
557 "to_timestamp_nanos"
558 }
559
560 fn signature(&self) -> &Signature {
561 &self.signature
562 }
563
564 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
565 Ok(return_type_for(&arg_types[0], Nanosecond))
566 }
567
568 fn invoke_with_args(
569 &self,
570 args: datafusion_expr::ScalarFunctionArgs,
571 ) -> Result<ColumnarValue> {
572 let args = args.args;
573 if args.is_empty() {
574 return exec_err!(
575 "to_timestamp_nanos function requires 1 or more arguments, got {}",
576 args.len()
577 );
578 }
579
580 if args.len() > 1 {
582 validate_data_types(&args, "to_timestamp")?;
583 }
584
585 match args[0].data_type() {
586 Null | Int32 | Int64 | Timestamp(_, None) => {
587 args[0].cast_to(&Timestamp(Nanosecond, None), None)
588 }
589 Timestamp(_, Some(tz)) => {
590 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
591 }
592 Utf8View | LargeUtf8 | Utf8 => {
593 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
594 }
595 other => {
596 exec_err!(
597 "Unsupported data type {} for function to_timestamp_nanos",
598 other
599 )
600 }
601 }
602 }
603 fn documentation(&self) -> Option<&Documentation> {
604 self.doc()
605 }
606}
607
608fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
611 match arg {
612 Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
613 _ => Timestamp(unit, None),
614 }
615}
616
617fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
618 args: &[ColumnarValue],
619 name: &str,
620) -> Result<ColumnarValue> {
621 let factor = match T::UNIT {
622 Second => 1_000_000_000,
623 Millisecond => 1_000_000,
624 Microsecond => 1_000,
625 Nanosecond => 1,
626 };
627
628 match args.len() {
629 1 => handle::<T, _, T>(
630 args,
631 |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
632 name,
633 ),
634 n if n >= 2 => handle_multiple::<T, _, T, _>(
635 args,
636 string_to_timestamp_nanos_formatted,
637 |n| n / factor,
638 name,
639 ),
640 _ => exec_err!("Unsupported 0 argument count for function {name}"),
641 }
642}
643
644#[cfg(test)]
645mod tests {
646 use std::sync::Arc;
647
648 use arrow::array::types::Int64Type;
649 use arrow::array::{
650 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
651 TimestampNanosecondArray, TimestampSecondArray,
652 };
653 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
654 use arrow::datatypes::{Field, TimeUnit};
655 use chrono::Utc;
656 use datafusion_common::config::ConfigOptions;
657 use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
658 use datafusion_expr::ScalarFunctionImplementation;
659
660 use super::*;
661
662 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
663 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
664 }
665
666 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
668 to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
669 }
670
671 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
673 to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
674 }
675
676 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
678 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
679 }
680
681 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
683 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
684 }
685
686 #[test]
687 fn to_timestamp_arrays_and_nulls() -> Result<()> {
688 let mut string_builder = StringBuilder::with_capacity(2, 1024);
691 let mut ts_builder = TimestampNanosecondArray::builder(2);
692
693 string_builder.append_value("2020-09-08T13:42:29.190855");
694 ts_builder.append_value(1599572549190855000);
695
696 string_builder.append_null();
697 ts_builder.append_null();
698 let expected_timestamps = &ts_builder.finish() as &dyn Array;
699
700 let string_array =
701 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
702 let parsed_timestamps = to_timestamp(&[string_array])
703 .expect("that to_timestamp parsed values without error");
704 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
705 assert_eq!(parsed_array.len(), 2);
706 assert_eq!(expected_timestamps, parsed_array.as_ref());
707 } else {
708 panic!("Expected a columnar array")
709 }
710 Ok(())
711 }
712
713 #[test]
714 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
715 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
718 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
719 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
720 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
721 let mut ts_builder = TimestampNanosecondArray::builder(2);
722
723 date_string_builder.append_null();
724 format1_builder.append_null();
725 format2_builder.append_null();
726 format3_builder.append_null();
727 ts_builder.append_null();
728
729 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
730 format1_builder.append_value("%s");
731 format2_builder.append_value("%c");
732 format3_builder.append_value("%+");
733 ts_builder.append_value(1599572549190850000);
734
735 let expected_timestamps = &ts_builder.finish() as &dyn Array;
736
737 let string_array = [
738 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
739 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
740 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
741 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
742 ];
743 let parsed_timestamps = to_timestamp(&string_array)
744 .expect("that to_timestamp with format args parsed values without error");
745 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
746 assert_eq!(parsed_array.len(), 2);
747 assert_eq!(expected_timestamps, parsed_array.as_ref());
748 } else {
749 panic!("Expected a columnar array")
750 }
751 Ok(())
752 }
753
754 #[test]
755 fn to_timestamp_invalid_input_type() -> Result<()> {
756 let mut builder = Int64Array::builder(1);
760 builder.append_value(1);
761 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
762
763 let expected_err =
764 "Execution error: Unsupported data type Int64 for function to_timestamp";
765 match to_timestamp(&[int64array]) {
766 Ok(_) => panic!("Expected error but got success"),
767 Err(e) => {
768 assert!(
769 e.to_string().contains(expected_err),
770 "Can not find expected error '{expected_err}'. Actual error '{e}'"
771 );
772 }
773 }
774 Ok(())
775 }
776
777 #[test]
778 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
779 let mut builder = Int64Array::builder(1);
783 builder.append_value(1);
784 let int64array = [
785 ColumnarValue::Array(Arc::new(builder.finish())),
786 ColumnarValue::Array(Arc::new(builder.finish())),
787 ];
788
789 let expected_err =
790 "Execution error: Unsupported data type Int64 for function to_timestamp";
791 match to_timestamp(&int64array) {
792 Ok(_) => panic!("Expected error but got success"),
793 Err(e) => {
794 assert!(
795 e.to_string().contains(expected_err),
796 "Can not find expected error '{expected_err}'. Actual error '{e}'"
797 );
798 }
799 }
800 Ok(())
801 }
802
803 #[test]
804 fn to_timestamp_with_unparsable_data() -> Result<()> {
805 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
806
807 date_string_builder.append_null();
808
809 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
810
811 let string_array =
812 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
813
814 let expected_err =
815 "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
816 match to_timestamp(&[string_array]) {
817 Ok(_) => panic!("Expected error but got success"),
818 Err(e) => {
819 assert!(
820 e.to_string().contains(expected_err),
821 "Can not find expected error '{expected_err}'. Actual error '{e}'"
822 );
823 }
824 }
825 Ok(())
826 }
827
828 #[test]
829 fn to_timestamp_with_invalid_tz() -> Result<()> {
830 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
831
832 date_string_builder.append_null();
833
834 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
835
836 let string_array =
837 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
838
839 let expected_err =
840 "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
841 match to_timestamp(&[string_array]) {
842 Ok(_) => panic!("Expected error but got success"),
843 Err(e) => {
844 assert!(
845 e.to_string().contains(expected_err),
846 "Can not find expected error '{expected_err}'. Actual error '{e}'"
847 );
848 }
849 }
850 Ok(())
851 }
852
853 #[test]
854 fn to_timestamp_with_no_matching_formats() -> Result<()> {
855 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
856 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
857 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
858 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
859
860 date_string_builder.append_null();
861 format1_builder.append_null();
862 format2_builder.append_null();
863 format3_builder.append_null();
864
865 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
866 format1_builder.append_value("%s");
867 format2_builder.append_value("%c");
868 format3_builder.append_value("%H:%M:%S");
869
870 let string_array = [
871 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
872 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
873 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
874 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
875 ];
876
877 let expected_err =
878 "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
879 match to_timestamp(&string_array) {
880 Ok(_) => panic!("Expected error but got success"),
881 Err(e) => {
882 assert!(
883 e.to_string().contains(expected_err),
884 "Can not find expected error '{expected_err}'. Actual error '{e}'"
885 );
886 }
887 }
888 Ok(())
889 }
890
891 #[test]
892 fn string_to_timestamp_formatted() {
893 assert_eq!(
895 1599572549190855000,
896 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
897 );
898 assert_eq!(
899 1599572549190855000,
900 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
901 );
902 assert_eq!(
903 1599572549000000000,
904 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
905 ); assert_eq!(
907 1599590549190855000,
908 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
909 );
910 assert_eq!(
911 1599590549000000000,
912 parse_timestamp_formatted("1599590549", "%s").unwrap()
913 );
914 assert_eq!(
915 1599572549000000000,
916 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
917 .unwrap()
918 );
919 assert_eq!(
920 1642896000000000000,
921 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
922 );
923 }
924
925 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
926 let result = string_to_timestamp_nanos_formatted(s, format);
927 if let Err(e) = &result {
928 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
929 }
930 result
931 }
932
933 #[test]
934 fn string_to_timestamp_formatted_invalid() {
935 let cases = [
937 ("", "%Y%m%d %H%M%S", "premature end of input"),
938 ("SS", "%c", "premature end of input"),
939 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
940 (
941 "Wed, 18 Feb 2015 23:16:09 GMT",
942 "%XX",
943 "input contains invalid characters",
944 ),
945 (
946 "Wed, 18 Feb 2015 23:16:09 GMT",
947 "%Y%m%d %H%M%S",
948 "input contains invalid characters",
949 ),
950 ];
951
952 for (s, f, ctx) in cases {
953 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
954 let actual = string_to_datetime_formatted(&Utc, s, f)
955 .unwrap_err()
956 .strip_backtrace();
957 assert_eq!(actual, expected)
958 }
959 }
960
961 #[test]
962 fn string_to_timestamp_invalid_arguments() {
963 let cases = [
965 ("", "%Y%m%d %H%M%S", "premature end of input"),
966 ("SS", "%c", "premature end of input"),
967 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
968 (
969 "Wed, 18 Feb 2015 23:16:09 GMT",
970 "%XX",
971 "input contains invalid characters",
972 ),
973 (
974 "Wed, 18 Feb 2015 23:16:09 GMT",
975 "%Y%m%d %H%M%S",
976 "input contains invalid characters",
977 ),
978 ];
979
980 for (s, f, ctx) in cases {
981 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
982 let actual = string_to_datetime_formatted(&Utc, s, f)
983 .unwrap_err()
984 .strip_backtrace();
985 assert_eq!(actual, expected)
986 }
987 }
988
989 #[test]
990 fn test_tz() {
991 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
992 Box::new(ToTimestampFunc::new()),
993 Box::new(ToTimestampSecondsFunc::new()),
994 Box::new(ToTimestampMillisFunc::new()),
995 Box::new(ToTimestampNanosFunc::new()),
996 Box::new(ToTimestampSecondsFunc::new()),
997 ];
998
999 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1000 let mut millis_builder = TimestampMillisecondArray::builder(2);
1001 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1002 let mut sec_builder = TimestampSecondArray::builder(2);
1003
1004 nanos_builder.append_value(1599572549190850000);
1005 millis_builder.append_value(1599572549190);
1006 micros_builder.append_value(1599572549190850);
1007 sec_builder.append_value(1599572549);
1008
1009 let nanos_timestamps =
1010 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
1011 let millis_timestamps =
1012 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1013 let micros_timestamps =
1014 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1015 let sec_timestamps =
1016 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1017
1018 let arrays = &[
1019 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1020 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1021 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1022 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1023 ];
1024
1025 for udf in &udfs {
1026 for array in arrays {
1027 let rt = udf.return_type(&[array.data_type()]).unwrap();
1028 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1029 assert!(matches!(rt, Timestamp(_, Some(_))));
1030 let args = datafusion_expr::ScalarFunctionArgs {
1031 args: vec![array.clone()],
1032 arg_fields: vec![arg_field],
1033 number_rows: 4,
1034 return_field: Field::new("f", rt, true).into(),
1035 config_options: Arc::new(ConfigOptions::default()),
1036 };
1037 let res = udf
1038 .invoke_with_args(args)
1039 .expect("that to_timestamp parsed values without error");
1040 let array = match res {
1041 ColumnarValue::Array(res) => res,
1042 _ => panic!("Expected a columnar array"),
1043 };
1044 let ty = array.data_type();
1045 assert!(matches!(ty, Timestamp(_, Some(_))));
1046 }
1047 }
1048
1049 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1050 let mut millis_builder = TimestampMillisecondArray::builder(2);
1051 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1052 let mut sec_builder = TimestampSecondArray::builder(2);
1053 let mut i64_builder = Int64Array::builder(2);
1054
1055 nanos_builder.append_value(1599572549190850000);
1056 millis_builder.append_value(1599572549190);
1057 micros_builder.append_value(1599572549190850);
1058 sec_builder.append_value(1599572549);
1059 i64_builder.append_value(1599572549);
1060
1061 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1062 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1063 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1064 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1065 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1066
1067 let arrays = &[
1068 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1069 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1070 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1071 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1072 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1073 ];
1074
1075 for udf in &udfs {
1076 for array in arrays {
1077 let rt = udf.return_type(&[array.data_type()]).unwrap();
1078 assert!(matches!(rt, Timestamp(_, None)));
1079 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1080 let args = datafusion_expr::ScalarFunctionArgs {
1081 args: vec![array.clone()],
1082 arg_fields: vec![arg_field],
1083 number_rows: 5,
1084 return_field: Field::new("f", rt, true).into(),
1085 config_options: Arc::new(ConfigOptions::default()),
1086 };
1087 let res = udf
1088 .invoke_with_args(args)
1089 .expect("that to_timestamp parsed values without error");
1090 let array = match res {
1091 ColumnarValue::Array(res) => res,
1092 _ => panic!("Expected a columnar array"),
1093 };
1094 let ty = array.data_type();
1095 assert!(matches!(ty, Timestamp(_, None)));
1096 }
1097 }
1098 }
1099
1100 #[test]
1101 fn test_to_timestamp_arg_validation() {
1102 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1103 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1104
1105 let data = date_string_builder.finish();
1106
1107 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1108 (Arc::new(to_timestamp), Nanosecond),
1109 (Arc::new(to_timestamp_micros), Microsecond),
1110 (Arc::new(to_timestamp_millis), Millisecond),
1111 (Arc::new(to_timestamp_nanos), Nanosecond),
1112 (Arc::new(to_timestamp_seconds), Second),
1113 ];
1114
1115 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1116 let mut millis_builder = TimestampMillisecondArray::builder(2);
1117 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1118 let mut sec_builder = TimestampSecondArray::builder(2);
1119
1120 nanos_builder.append_value(1599572549190850000);
1121 millis_builder.append_value(1599572549190);
1122 micros_builder.append_value(1599572549190850);
1123 sec_builder.append_value(1599572549);
1124
1125 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1126 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1127 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1128 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1129
1130 for (func, time_unit) in funcs {
1131 let string_array = [
1133 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1134 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1135 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1136 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1137 ];
1138 let parsed_timestamps = func(&string_array)
1139 .expect("that to_timestamp with format args parsed values without error");
1140 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1141 assert_eq!(parsed_array.len(), 1);
1142 match time_unit {
1143 Nanosecond => {
1144 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1145 }
1146 Millisecond => {
1147 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1148 }
1149 Microsecond => {
1150 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1151 }
1152 Second => {
1153 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1154 }
1155 };
1156 } else {
1157 panic!("Expected a columnar array")
1158 }
1159
1160 let string_array = [
1162 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1163 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1164 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1165 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1166 ];
1167 let parsed_timestamps = func(&string_array)
1168 .expect("that to_timestamp with format args parsed values without error");
1169 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1170 assert_eq!(parsed_array.len(), 1);
1171 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1172
1173 match time_unit {
1174 Nanosecond => {
1175 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1176 }
1177 Millisecond => {
1178 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1179 }
1180 Microsecond => {
1181 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1182 }
1183 Second => {
1184 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1185 }
1186 };
1187 } else {
1188 panic!("Expected a columnar array")
1189 }
1190
1191 let string_array = [
1193 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1194 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1195 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1196 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1197 ];
1198
1199 let expected = "Unsupported data type Int32 for function".to_string();
1200 let actual = func(&string_array).unwrap_err().to_string();
1201 assert_contains!(actual, expected);
1202
1203 let string_array = [
1205 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1206 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1207 vec![1i64].into(),
1208 None,
1209 )) as ArrayRef),
1210 ];
1211
1212 let expected = "Unsupported data type".to_string();
1213 let actual = func(&string_array).unwrap_err().to_string();
1214 assert_contains!(actual, expected);
1215 }
1216 }
1217}