datafusion_functions/datetime/
to_timestamp.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::array::Float64Array;
23use arrow::array::timezone::Tz;
24use arrow::datatypes::DataType::*;
25use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
26use arrow::datatypes::{
27    ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
28    TimestampNanosecondType, TimestampSecondType,
29};
30use datafusion_common::config::ConfigOptions;
31use datafusion_common::format::DEFAULT_CAST_OPTIONS;
32use datafusion_common::{Result, ScalarType, ScalarValue, exec_err};
33use datafusion_expr::{
34    ColumnarValue, Documentation, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
35};
36use datafusion_macros::user_doc;
37
38#[user_doc(
39    doc_section(label = "Time and Date Functions"),
40    description = r#"
41Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
42integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
43if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
44session time zone, or UTC if no session time zone is set.
45Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
46
47Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range
48for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between
49`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
50for the input outside of supported bounds.
51
52The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
53The time zone can be a value like +00:00, 'Europe/London' etc.
54"#,
55    syntax_example = "to_timestamp(expression[, ..., format_n])",
56    sql_example = r#"```sql
57> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
58+-----------------------------------------------------------+
59| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
60+-----------------------------------------------------------+
61| 2023-01-31T14:26:56.123456789                             |
62+-----------------------------------------------------------+
63> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
64+--------------------------------------------------------------------------------------------------------+
65| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
66+--------------------------------------------------------------------------------------------------------+
67| 2023-05-17T03:59:00.123456789                                                                          |
68+--------------------------------------------------------------------------------------------------------+
69```
70Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
71"#,
72    argument(
73        name = "expression",
74        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
75    ),
76    argument(
77        name = "format_n",
78        description = r#"
79Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
80Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
81parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
82only supported at the end of the string preceded by a space.
83"#
84    )
85)]
86#[derive(Debug, PartialEq, Eq, Hash)]
87pub struct ToTimestampFunc {
88    signature: Signature,
89    timezone: Option<Arc<str>>,
90}
91
92#[user_doc(
93    doc_section(label = "Time and Date Functions"),
94    description = r#"
95Converts a value to a timestamp (`YYYY-MM-DDT00:00:00<TZ>`) in the session time zone. Supports strings,
96integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
97if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
98session time zone, or UTC if no session time zone is set.
99Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
100
101The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
102The time zone can be a value like +00:00, 'Europe/London' etc.
103"#,
104    syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
105    sql_example = r#"```sql
106> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
107+-------------------------------------------------------------------+
108| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
109+-------------------------------------------------------------------+
110| 2023-01-31T14:26:56                                               |
111+-------------------------------------------------------------------+
112> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
113+----------------------------------------------------------------------------------------------------------------+
114| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
115+----------------------------------------------------------------------------------------------------------------+
116| 2023-05-17T03:59:00                                                                                            |
117+----------------------------------------------------------------------------------------------------------------+
118```
119Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
120"#,
121    argument(
122        name = "expression",
123        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
124    ),
125    argument(
126        name = "format_n",
127        description = r#"
128Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
129Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
130parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
131only supported at the end of the string preceded by a space.
132"#
133    )
134)]
135#[derive(Debug, PartialEq, Eq, Hash)]
136pub struct ToTimestampSecondsFunc {
137    signature: Signature,
138    timezone: Option<Arc<str>>,
139}
140
141#[user_doc(
142    doc_section(label = "Time and Date Functions"),
143    description = r#"
144Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000<TZ>`) in the session time zone. Supports strings,
145integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
146if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
147session time zone, or UTC if no session time zone is set.
148Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
149
150The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
151The time zone can be a value like +00:00, 'Europe/London' etc.
152"#,
153    syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
154    sql_example = r#"```sql
155> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
156+------------------------------------------------------------------+
157| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
158+------------------------------------------------------------------+
159| 2023-01-31T14:26:56.123                                          |
160+------------------------------------------------------------------+
161> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
162+---------------------------------------------------------------------------------------------------------------+
163| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
164+---------------------------------------------------------------------------------------------------------------+
165| 2023-05-17T03:59:00.123                                                                                       |
166+---------------------------------------------------------------------------------------------------------------+
167```
168Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
169"#,
170    argument(
171        name = "expression",
172        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
173    ),
174    argument(
175        name = "format_n",
176        description = r#"
177Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
178Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
179parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
180only supported at the end of the string preceded by a space.
181"#
182    )
183)]
184#[derive(Debug, PartialEq, Eq, Hash)]
185pub struct ToTimestampMillisFunc {
186    signature: Signature,
187    timezone: Option<Arc<str>>,
188}
189
190#[user_doc(
191    doc_section(label = "Time and Date Functions"),
192    description = r#"
193Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
194integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
195if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
196session time zone, or UTC if no session time zone is set.
197Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`).
198
199The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
200The time zone can be a value like +00:00, 'Europe/London' etc.
201"#,
202    syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
203    sql_example = r#"```sql
204> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
205+------------------------------------------------------------------+
206| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
207+------------------------------------------------------------------+
208| 2023-01-31T14:26:56.123456                                       |
209+------------------------------------------------------------------+
210> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
211+---------------------------------------------------------------------------------------------------------------+
212| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
213+---------------------------------------------------------------------------------------------------------------+
214| 2023-05-17T03:59:00.123456                                                                                    |
215+---------------------------------------------------------------------------------------------------------------+
216```
217Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
218"#,
219    argument(
220        name = "expression",
221        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
222    ),
223    argument(
224        name = "format_n",
225        description = r#"
226Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
227Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
228parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
229only supported at the end of the string preceded by a space.
230"#
231    )
232)]
233#[derive(Debug, PartialEq, Eq, Hash)]
234pub struct ToTimestampMicrosFunc {
235    signature: Signature,
236    timezone: Option<Arc<str>>,
237}
238
239#[user_doc(
240    doc_section(label = "Time and Date Functions"),
241    description = r#"
242Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000<TZ>`) in the session time zone. Supports strings,
243integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
244if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
245session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
246
247The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
248The time zone can be a value like +00:00, 'Europe/London' etc.
249"#,
250    syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
251    sql_example = r#"```sql
252> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
253+-----------------------------------------------------------------+
254| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
255+-----------------------------------------------------------------+
256| 2023-01-31T14:26:56.123456789                                   |
257+-----------------------------------------------------------------+
258> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
259+--------------------------------------------------------------------------------------------------------------+
260| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
261+--------------------------------------------------------------------------------------------------------------+
262| 2023-05-17T03:59:00.123456789                                                                                |
263+---------------------------------------------------------------------------------------------------------------+
264```
265Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
266"#,
267    argument(
268        name = "expression",
269        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
270    ),
271    argument(
272        name = "format_n",
273        description = r#"
274Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
275Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
276parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
277only supported at the end of the string preceded by a space.
278"#
279    )
280)]
281#[derive(Debug, PartialEq, Eq, Hash)]
282pub struct ToTimestampNanosFunc {
283    signature: Signature,
284    timezone: Option<Arc<str>>,
285}
286
287/// Macro to generate boilerplate constructors and config methods for ToTimestamp* functions.
288/// Generates: Default impl, deprecated new(), new_with_config(), and extracts timezone from ConfigOptions.
289macro_rules! impl_to_timestamp_constructors {
290    ($func:ty) => {
291        impl Default for $func {
292            fn default() -> Self {
293                Self::new_with_config(&ConfigOptions::default())
294            }
295        }
296
297        impl $func {
298            #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")]
299            /// Deprecated constructor retained for backwards compatibility.
300            ///
301            /// Prefer `new_with_config` which allows specifying the
302            /// timezone via [`ConfigOptions`]. This helper now mirrors the
303            /// canonical default offset (None) provided by `ConfigOptions::default()`.
304            pub fn new() -> Self {
305                Self::new_with_config(&ConfigOptions::default())
306            }
307
308            pub fn new_with_config(config: &ConfigOptions) -> Self {
309                Self {
310                    signature: Signature::variadic_any(Volatility::Immutable),
311                    timezone: config
312                        .execution
313                        .time_zone
314                        .as_ref()
315                        .map(|tz| Arc::from(tz.as_str())),
316                }
317            }
318        }
319    };
320}
321
322impl_to_timestamp_constructors!(ToTimestampFunc);
323impl_to_timestamp_constructors!(ToTimestampSecondsFunc);
324impl_to_timestamp_constructors!(ToTimestampMillisFunc);
325impl_to_timestamp_constructors!(ToTimestampMicrosFunc);
326impl_to_timestamp_constructors!(ToTimestampNanosFunc);
327
328/// to_timestamp SQL function
329///
330/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
331/// The supported range for integer input is between `-9223372037` and `9223372036`.
332/// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`.
333/// Please use `to_timestamp_seconds` for the input outside of supported bounds.
334/// Macro to generate the with_updated_config method for ToTimestamp* functions.
335macro_rules! impl_with_updated_config {
336    () => {
337        fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> {
338            Some(Self::new_with_config(config).into())
339        }
340    };
341}
342
343impl ScalarUDFImpl for ToTimestampFunc {
344    fn as_any(&self) -> &dyn Any {
345        self
346    }
347
348    fn name(&self) -> &str {
349        "to_timestamp"
350    }
351
352    fn signature(&self) -> &Signature {
353        &self.signature
354    }
355
356    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
357        Ok(Timestamp(Nanosecond, self.timezone.clone()))
358    }
359
360    impl_with_updated_config!();
361
362    fn invoke_with_args(
363        &self,
364        args: datafusion_expr::ScalarFunctionArgs,
365    ) -> Result<ColumnarValue> {
366        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
367
368        if args.is_empty() {
369            return exec_err!(
370                "to_timestamp function requires 1 or more arguments, got {}",
371                args.len()
372            );
373        }
374
375        // validate that any args after the first one are Utf8
376        if args.len() > 1 {
377            validate_data_types(&args, "to_timestamp")?;
378        }
379
380        let tz = self.timezone.clone();
381
382        match args[0].data_type() {
383            Int32 | Int64 => args[0]
384                .cast_to(&Timestamp(Second, None), None)?
385                .cast_to(&Timestamp(Nanosecond, tz), None),
386            Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None),
387            Float64 => {
388                let rescaled = arrow::compute::kernels::numeric::mul(
389                    &args[0].to_array(1)?,
390                    &arrow::array::Scalar::new(Float64Array::from(vec![
391                        1_000_000_000f64,
392                    ])),
393                )?;
394                Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
395                    &rescaled,
396                    &Timestamp(Nanosecond, tz),
397                    &DEFAULT_CAST_OPTIONS,
398                )?))
399            }
400            Utf8View | LargeUtf8 | Utf8 => {
401                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp", &tz)
402            }
403            Decimal128(_, _) => {
404                match &args[0] {
405                    ColumnarValue::Scalar(ScalarValue::Decimal128(
406                        Some(value),
407                        _,
408                        scale,
409                    )) => {
410                        // Convert decimal to seconds and nanoseconds
411                        let scale_factor = 10_i128.pow(*scale as u32);
412                        let seconds = value / scale_factor;
413                        let fraction = value % scale_factor;
414                        let nanos = (fraction * 1_000_000_000) / scale_factor;
415                        let timestamp_nanos = seconds * 1_000_000_000 + nanos;
416
417                        Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
418                            Some(timestamp_nanos as i64),
419                            tz,
420                        )))
421                    }
422                    _ => exec_err!("Invalid decimal value"),
423                }
424            }
425            other => {
426                exec_err!("Unsupported data type {other} for function to_timestamp")
427            }
428        }
429    }
430
431    fn documentation(&self) -> Option<&Documentation> {
432        self.doc()
433    }
434}
435
436impl ScalarUDFImpl for ToTimestampSecondsFunc {
437    fn as_any(&self) -> &dyn Any {
438        self
439    }
440
441    fn name(&self) -> &str {
442        "to_timestamp_seconds"
443    }
444
445    fn signature(&self) -> &Signature {
446        &self.signature
447    }
448
449    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
450        Ok(Timestamp(Second, self.timezone.clone()))
451    }
452
453    impl_with_updated_config!();
454
455    fn invoke_with_args(
456        &self,
457        args: datafusion_expr::ScalarFunctionArgs,
458    ) -> Result<ColumnarValue> {
459        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
460
461        if args.is_empty() {
462            return exec_err!(
463                "to_timestamp_seconds function requires 1 or more arguments, got {}",
464                args.len()
465            );
466        }
467
468        // validate that any args after the first one are Utf8
469        if args.len() > 1 {
470            validate_data_types(&args, "to_timestamp")?;
471        }
472
473        let tz = self.timezone.clone();
474
475        match args[0].data_type() {
476            Null | Int32 | Int64 | Timestamp(_, _) | Decimal128(_, _) => {
477                args[0].cast_to(&Timestamp(Second, tz), None)
478            }
479            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampSecondType>(
480                &args,
481                "to_timestamp_seconds",
482                &self.timezone,
483            ),
484            other => {
485                exec_err!(
486                    "Unsupported data type {} for function to_timestamp_seconds",
487                    other
488                )
489            }
490        }
491    }
492
493    fn documentation(&self) -> Option<&Documentation> {
494        self.doc()
495    }
496}
497
498impl ScalarUDFImpl for ToTimestampMillisFunc {
499    fn as_any(&self) -> &dyn Any {
500        self
501    }
502
503    fn name(&self) -> &str {
504        "to_timestamp_millis"
505    }
506
507    fn signature(&self) -> &Signature {
508        &self.signature
509    }
510
511    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
512        Ok(Timestamp(Millisecond, self.timezone.clone()))
513    }
514
515    impl_with_updated_config!();
516
517    fn invoke_with_args(
518        &self,
519        args: datafusion_expr::ScalarFunctionArgs,
520    ) -> Result<ColumnarValue> {
521        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
522
523        if args.is_empty() {
524            return exec_err!(
525                "to_timestamp_millis function requires 1 or more arguments, got {}",
526                args.len()
527            );
528        }
529
530        // validate that any args after the first one are Utf8
531        if args.len() > 1 {
532            validate_data_types(&args, "to_timestamp")?;
533        }
534
535        match args[0].data_type() {
536            Null | Int32 | Int64 | Timestamp(_, _) => {
537                args[0].cast_to(&Timestamp(Millisecond, self.timezone.clone()), None)
538            }
539            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
540                &args,
541                "to_timestamp_millis",
542                &self.timezone,
543            ),
544            other => {
545                exec_err!(
546                    "Unsupported data type {} for function to_timestamp_millis",
547                    other
548                )
549            }
550        }
551    }
552
553    fn documentation(&self) -> Option<&Documentation> {
554        self.doc()
555    }
556}
557
558impl ScalarUDFImpl for ToTimestampMicrosFunc {
559    fn as_any(&self) -> &dyn Any {
560        self
561    }
562
563    fn name(&self) -> &str {
564        "to_timestamp_micros"
565    }
566
567    fn signature(&self) -> &Signature {
568        &self.signature
569    }
570
571    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
572        Ok(Timestamp(Microsecond, self.timezone.clone()))
573    }
574
575    impl_with_updated_config!();
576
577    fn invoke_with_args(
578        &self,
579        args: datafusion_expr::ScalarFunctionArgs,
580    ) -> Result<ColumnarValue> {
581        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
582
583        if args.is_empty() {
584            return exec_err!(
585                "to_timestamp_micros function requires 1 or more arguments, got {}",
586                args.len()
587            );
588        }
589
590        // validate that any args after the first one are Utf8
591        if args.len() > 1 {
592            validate_data_types(&args, "to_timestamp")?;
593        }
594
595        match args[0].data_type() {
596            Null | Int32 | Int64 | Timestamp(_, _) => {
597                args[0].cast_to(&Timestamp(Microsecond, self.timezone.clone()), None)
598            }
599            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
600                &args,
601                "to_timestamp_micros",
602                &self.timezone,
603            ),
604            other => {
605                exec_err!(
606                    "Unsupported data type {} for function to_timestamp_micros",
607                    other
608                )
609            }
610        }
611    }
612
613    fn documentation(&self) -> Option<&Documentation> {
614        self.doc()
615    }
616}
617
618impl ScalarUDFImpl for ToTimestampNanosFunc {
619    fn as_any(&self) -> &dyn Any {
620        self
621    }
622
623    fn name(&self) -> &str {
624        "to_timestamp_nanos"
625    }
626
627    fn signature(&self) -> &Signature {
628        &self.signature
629    }
630
631    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
632        Ok(Timestamp(Nanosecond, self.timezone.clone()))
633    }
634
635    impl_with_updated_config!();
636
637    fn invoke_with_args(
638        &self,
639        args: datafusion_expr::ScalarFunctionArgs,
640    ) -> Result<ColumnarValue> {
641        let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
642
643        if args.is_empty() {
644            return exec_err!(
645                "to_timestamp_nanos function requires 1 or more arguments, got {}",
646                args.len()
647            );
648        }
649
650        // validate that any args after the first one are Utf8
651        if args.len() > 1 {
652            validate_data_types(&args, "to_timestamp")?;
653        }
654
655        match args[0].data_type() {
656            Null | Int32 | Int64 | Timestamp(_, _) => {
657                args[0].cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None)
658            }
659            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampNanosecondType>(
660                &args,
661                "to_timestamp_nanos",
662                &self.timezone,
663            ),
664            other => {
665                exec_err!(
666                    "Unsupported data type {} for function to_timestamp_nanos",
667                    other
668                )
669            }
670        }
671    }
672
673    fn documentation(&self) -> Option<&Documentation> {
674        self.doc()
675    }
676}
677
678fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
679    args: &[ColumnarValue],
680    name: &str,
681    timezone: &Option<Arc<str>>,
682) -> Result<ColumnarValue> {
683    let factor = match T::UNIT {
684        Second => 1_000_000_000,
685        Millisecond => 1_000_000,
686        Microsecond => 1_000,
687        Nanosecond => 1,
688    };
689
690    let tz = match timezone.clone() {
691        Some(tz) => Some(tz.parse::<Tz>()?),
692        None => None,
693    };
694
695    match args.len() {
696        1 => handle::<T, _>(
697            args,
698            move |s| string_to_timestamp_nanos_with_timezone(&tz, s).map(|n| n / factor),
699            name,
700            &Timestamp(T::UNIT, timezone.clone()),
701        ),
702        n if n >= 2 => handle_multiple::<T, _, _>(
703            args,
704            move |s, format| {
705                string_to_timestamp_nanos_formatted_with_timezone(&tz, s, format)
706            },
707            |n| n / factor,
708            name,
709            &Timestamp(T::UNIT, timezone.clone()),
710        ),
711        _ => exec_err!("Unsupported 0 argument count for function {name}"),
712    }
713}
714
715#[cfg(test)]
716mod tests {
717    use std::sync::Arc;
718
719    use arrow::array::types::Int64Type;
720    use arrow::array::{
721        Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
722        TimestampNanosecondArray, TimestampSecondArray,
723    };
724    use arrow::array::{ArrayRef, Int64Array, StringBuilder};
725    use arrow::datatypes::{Field, TimeUnit};
726    use chrono::{DateTime, FixedOffset, Utc};
727    use datafusion_common::config::ConfigOptions;
728    use datafusion_common::{DataFusionError, ScalarValue, assert_contains};
729    use datafusion_expr::{ScalarFunctionArgs, ScalarFunctionImplementation};
730
731    use super::*;
732
733    fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
734        let timezone: Option<Arc<str>> = Some("UTC".into());
735        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp", &timezone)
736    }
737
738    /// to_timestamp_millis SQL function
739    fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
740        let timezone: Option<Arc<str>> = Some("UTC".into());
741        to_timestamp_impl::<TimestampMillisecondType>(
742            args,
743            "to_timestamp_millis",
744            &timezone,
745        )
746    }
747
748    /// to_timestamp_micros SQL function
749    fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
750        let timezone: Option<Arc<str>> = Some("UTC".into());
751        to_timestamp_impl::<TimestampMicrosecondType>(
752            args,
753            "to_timestamp_micros",
754            &timezone,
755        )
756    }
757
758    /// to_timestamp_nanos SQL function
759    fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
760        let timezone: Option<Arc<str>> = Some("UTC".into());
761        to_timestamp_impl::<TimestampNanosecondType>(
762            args,
763            "to_timestamp_nanos",
764            &timezone,
765        )
766    }
767
768    /// to_timestamp_seconds SQL function
769    fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
770        let timezone: Option<Arc<str>> = Some("UTC".into());
771        to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds", &timezone)
772    }
773
774    fn udfs_and_timeunit() -> Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> {
775        let udfs: Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> = vec![
776            (
777                Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
778                Nanosecond,
779            ),
780            (
781                Box::new(ToTimestampSecondsFunc::new_with_config(
782                    &ConfigOptions::default(),
783                )),
784                Second,
785            ),
786            (
787                Box::new(ToTimestampMillisFunc::new_with_config(
788                    &ConfigOptions::default(),
789                )),
790                Millisecond,
791            ),
792            (
793                Box::new(ToTimestampMicrosFunc::new_with_config(
794                    &ConfigOptions::default(),
795                )),
796                Microsecond,
797            ),
798            (
799                Box::new(ToTimestampNanosFunc::new_with_config(
800                    &ConfigOptions::default(),
801                )),
802                Nanosecond,
803            ),
804        ];
805        udfs
806    }
807
808    fn validate_expected_error(
809        options: &mut ConfigOptions,
810        args: ScalarFunctionArgs,
811        expected_err: &str,
812    ) {
813        let udfs = udfs_and_timeunit();
814
815        for (udf, _) in udfs {
816            match udf
817                .with_updated_config(options)
818                .unwrap()
819                .invoke_with_args(args.clone())
820            {
821                Ok(_) => panic!("Expected error but got success"),
822                Err(e) => {
823                    assert!(
824                        e.to_string().contains(expected_err),
825                        "Can not find expected error '{expected_err}'. Actual error '{e}'"
826                    );
827                }
828            }
829        }
830    }
831
832    #[test]
833    fn to_timestamp_arrays_and_nulls() -> Result<()> {
834        // ensure that arrow array implementation is wired up and handles nulls correctly
835
836        let mut string_builder = StringBuilder::with_capacity(2, 1024);
837        let mut ts_builder = TimestampNanosecondArray::builder(2);
838
839        string_builder.append_value("2020-09-08T13:42:29.190855");
840        ts_builder.append_value(1599572549190855000);
841
842        string_builder.append_null();
843        ts_builder.append_null();
844        let expected_timestamps = &ts_builder.finish() as &dyn Array;
845
846        let string_array =
847            ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
848        let parsed_timestamps = to_timestamp(&[string_array])
849            .expect("that to_timestamp parsed values without error");
850        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
851            assert_eq!(parsed_array.len(), 2);
852            assert_eq!(expected_timestamps, parsed_array.as_ref());
853        } else {
854            panic!("Expected a columnar array")
855        }
856        Ok(())
857    }
858
859    #[test]
860    fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
861        // ensure that arrow array implementation is wired up and handles nulls correctly
862
863        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
864        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
865        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
866        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
867        let mut ts_builder = TimestampNanosecondArray::builder(2);
868
869        date_string_builder.append_null();
870        format1_builder.append_null();
871        format2_builder.append_null();
872        format3_builder.append_null();
873        ts_builder.append_null();
874
875        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
876        format1_builder.append_value("%s");
877        format2_builder.append_value("%c");
878        format3_builder.append_value("%+");
879        ts_builder.append_value(1599572549190850000);
880
881        let expected_timestamps = &ts_builder.finish() as &dyn Array;
882
883        let string_array = [
884            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
885            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
886            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
887            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
888        ];
889        let parsed_timestamps = to_timestamp(&string_array)
890            .expect("that to_timestamp with format args parsed values without error");
891        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
892            assert_eq!(parsed_array.len(), 2);
893            assert_eq!(expected_timestamps, parsed_array.as_ref());
894        } else {
895            panic!("Expected a columnar array")
896        }
897        Ok(())
898    }
899
900    #[test]
901    fn to_timestamp_respects_execution_timezone() -> Result<()> {
902        let udfs = udfs_and_timeunit();
903
904        let mut options = ConfigOptions::default();
905        options.execution.time_zone = Some("-05:00".to_string());
906
907        let time_zone: Option<Arc<str>> = options
908            .execution
909            .time_zone
910            .as_ref()
911            .map(|tz| Arc::from(tz.as_str()));
912
913        for (udf, time_unit) in udfs {
914            let field = Field::new("arg", Utf8, true).into();
915
916            let args = ScalarFunctionArgs {
917                args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
918                    "2020-09-08T13:42:29".to_string(),
919                )))],
920                arg_fields: vec![field],
921                number_rows: 1,
922                return_field: Field::new(
923                    "f",
924                    Timestamp(time_unit, Some("-05:00".into())),
925                    true,
926                )
927                .into(),
928                config_options: Arc::new(options.clone()),
929            };
930
931            let result = udf
932                .with_updated_config(&options.clone())
933                .unwrap()
934                .invoke_with_args(args)?;
935            let result = match time_unit {
936                Second => {
937                    let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
938                        Some(value),
939                        tz,
940                    )) = result
941                    else {
942                        panic!("expected scalar timestamp");
943                    };
944
945                    assert_eq!(tz, time_zone);
946
947                    value
948                }
949                Millisecond => {
950                    let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
951                        Some(value),
952                        tz,
953                    )) = result
954                    else {
955                        panic!("expected scalar timestamp");
956                    };
957
958                    assert_eq!(tz, time_zone);
959
960                    value
961                }
962                Microsecond => {
963                    let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
964                        Some(value),
965                        tz,
966                    )) = result
967                    else {
968                        panic!("expected scalar timestamp");
969                    };
970
971                    assert_eq!(tz, time_zone);
972
973                    value
974                }
975                Nanosecond => {
976                    let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
977                        Some(value),
978                        tz,
979                    )) = result
980                    else {
981                        panic!("expected scalar timestamp");
982                    };
983
984                    assert_eq!(tz, time_zone);
985
986                    value
987                }
988            };
989
990            let scale = match time_unit {
991                Second => 1_000_000_000,
992                Millisecond => 1_000_000,
993                Microsecond => 1_000,
994                Nanosecond => 1,
995            };
996
997            let offset = FixedOffset::west_opt(5 * 3600).unwrap();
998            let result = Some(
999                DateTime::<Utc>::from_timestamp_nanos(result * scale)
1000                    .with_timezone(&offset)
1001                    .to_string(),
1002            );
1003
1004            assert_eq!(result, Some("2020-09-08 13:42:29 -05:00".to_string()));
1005        }
1006
1007        Ok(())
1008    }
1009
1010    #[test]
1011    fn to_timestamp_formats_respects_execution_timezone() -> Result<()> {
1012        let udfs = udfs_and_timeunit();
1013
1014        let mut options = ConfigOptions::default();
1015        options.execution.time_zone = Some("-05:00".to_string());
1016
1017        let time_zone: Option<Arc<str>> = options
1018            .execution
1019            .time_zone
1020            .as_ref()
1021            .map(|tz| Arc::from(tz.as_str()));
1022
1023        let expr_field = Field::new("arg", Utf8, true).into();
1024        let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1025
1026        for (udf, time_unit) in udfs {
1027            for (value, format, expected_str) in [
1028                (
1029                    "2020-09-08 09:42:29 -05:00",
1030                    "%Y-%m-%d %H:%M:%S %z",
1031                    Some("2020-09-08 09:42:29 -05:00"),
1032                ),
1033                (
1034                    "2020-09-08T13:42:29Z",
1035                    "%+",
1036                    Some("2020-09-08 08:42:29 -05:00"),
1037                ),
1038                (
1039                    "2020-09-08 13:42:29 UTC",
1040                    "%Y-%m-%d %H:%M:%S %Z",
1041                    Some("2020-09-08 08:42:29 -05:00"),
1042                ),
1043                (
1044                    "+0000 2024-01-01 12:00:00",
1045                    "%z %Y-%m-%d %H:%M:%S",
1046                    Some("2024-01-01 07:00:00 -05:00"),
1047                ),
1048                (
1049                    "20200908134229+0100",
1050                    "%Y%m%d%H%M%S%z",
1051                    Some("2020-09-08 07:42:29 -05:00"),
1052                ),
1053                (
1054                    "2020-09-08+0230 13:42",
1055                    "%Y-%m-%d%z %H:%M",
1056                    Some("2020-09-08 06:12:00 -05:00"),
1057                ),
1058            ] {
1059                let args = ScalarFunctionArgs {
1060                    args: vec![
1061                        ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1062                        ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1063                            format.to_string(),
1064                        ))),
1065                    ],
1066                    arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1067                    number_rows: 1,
1068                    return_field: Field::new(
1069                        "f",
1070                        Timestamp(time_unit, Some("-05:00".into())),
1071                        true,
1072                    )
1073                    .into(),
1074                    config_options: Arc::new(options.clone()),
1075                };
1076                let result = udf
1077                    .with_updated_config(&options.clone())
1078                    .unwrap()
1079                    .invoke_with_args(args)?;
1080                let result = match time_unit {
1081                    Second => {
1082                        let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
1083                            Some(value),
1084                            tz,
1085                        )) = result
1086                        else {
1087                            panic!("expected scalar timestamp");
1088                        };
1089
1090                        assert_eq!(tz, time_zone);
1091
1092                        value
1093                    }
1094                    Millisecond => {
1095                        let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
1096                            Some(value),
1097                            tz,
1098                        )) = result
1099                        else {
1100                            panic!("expected scalar timestamp");
1101                        };
1102
1103                        assert_eq!(tz, time_zone);
1104
1105                        value
1106                    }
1107                    Microsecond => {
1108                        let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
1109                            Some(value),
1110                            tz,
1111                        )) = result
1112                        else {
1113                            panic!("expected scalar timestamp");
1114                        };
1115
1116                        assert_eq!(tz, time_zone);
1117
1118                        value
1119                    }
1120                    Nanosecond => {
1121                        let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
1122                            Some(value),
1123                            tz,
1124                        )) = result
1125                        else {
1126                            panic!("expected scalar timestamp");
1127                        };
1128
1129                        assert_eq!(tz, time_zone);
1130
1131                        value
1132                    }
1133                };
1134
1135                let scale = match time_unit {
1136                    Second => 1_000_000_000,
1137                    Millisecond => 1_000_000,
1138                    Microsecond => 1_000,
1139                    Nanosecond => 1,
1140                };
1141                let offset = FixedOffset::west_opt(5 * 3600).unwrap();
1142                let result = Some(
1143                    DateTime::<Utc>::from_timestamp_nanos(result * scale)
1144                        .with_timezone(&offset)
1145                        .to_string(),
1146                );
1147
1148                assert_eq!(result, expected_str.map(|s| s.to_string()));
1149            }
1150        }
1151
1152        Ok(())
1153    }
1154
1155    #[test]
1156    fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> {
1157        let field: Arc<Field> = Field::new("arg", Utf8, true).into();
1158        let return_field: Arc<Field> =
1159            Field::new("f", Timestamp(Nanosecond, None), true).into();
1160
1161        let mut options = ConfigOptions::default();
1162        options.execution.time_zone = Some("Invalid/Timezone".to_string());
1163
1164        let args = ScalarFunctionArgs {
1165            args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1166                "2020-09-08T13:42:29Z".to_string(),
1167            )))],
1168            arg_fields: vec![Arc::clone(&field)],
1169            number_rows: 1,
1170            return_field: Arc::clone(&return_field),
1171            config_options: Arc::new(options.clone()),
1172        };
1173
1174        let expected_err =
1175            "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1176
1177        validate_expected_error(&mut options, args, expected_err);
1178
1179        Ok(())
1180    }
1181
1182    #[test]
1183    fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> {
1184        let expr_field: Arc<Field> = Field::new("arg", Utf8, true).into();
1185        let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1186        let return_field: Arc<Field> =
1187            Field::new("f", Timestamp(Nanosecond, None), true).into();
1188
1189        let mut options = ConfigOptions::default();
1190        options.execution.time_zone = Some("Invalid/Timezone".to_string());
1191
1192        let expected_err =
1193            "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1194
1195        let make_args = |value: &str, format: &str| ScalarFunctionArgs {
1196            args: vec![
1197                ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1198                ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))),
1199            ],
1200            arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1201            number_rows: 1,
1202            return_field: Arc::clone(&return_field),
1203            config_options: Arc::new(options.clone()),
1204        };
1205
1206        for (value, format, _expected_str) in [
1207            (
1208                "2020-09-08 09:42:29 -05:00",
1209                "%Y-%m-%d %H:%M:%S %z",
1210                Some("2020-09-08 09:42:29 -05:00"),
1211            ),
1212            (
1213                "2020-09-08T13:42:29Z",
1214                "%+",
1215                Some("2020-09-08 08:42:29 -05:00"),
1216            ),
1217            (
1218                "2020-09-08 13:42:29 +0000",
1219                "%Y-%m-%d %H:%M:%S %z",
1220                Some("2020-09-08 08:42:29 -05:00"),
1221            ),
1222            (
1223                "+0000 2024-01-01 12:00:00",
1224                "%z %Y-%m-%d %H:%M:%S",
1225                Some("2024-01-01 07:00:00 -05:00"),
1226            ),
1227            (
1228                "20200908134229+0100",
1229                "%Y%m%d%H%M%S%z",
1230                Some("2020-09-08 07:42:29 -05:00"),
1231            ),
1232            (
1233                "2020-09-08+0230 13:42",
1234                "%Y-%m-%d%z %H:%M",
1235                Some("2020-09-08 06:12:00 -05:00"),
1236            ),
1237        ] {
1238            let args = make_args(value, format);
1239            validate_expected_error(&mut options.clone(), args, expected_err);
1240        }
1241
1242        let args = ScalarFunctionArgs {
1243            args: vec![
1244                ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1245                    "2020-09-08T13:42:29".to_string(),
1246                ))),
1247                ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1248                    "%Y-%m-%dT%H:%M:%S".to_string(),
1249                ))),
1250            ],
1251            arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1252            number_rows: 1,
1253            return_field: Arc::clone(&return_field),
1254            config_options: Arc::new(options.clone()),
1255        };
1256
1257        validate_expected_error(&mut options.clone(), args, expected_err);
1258
1259        Ok(())
1260    }
1261
1262    #[test]
1263    fn to_timestamp_invalid_input_type() -> Result<()> {
1264        // pass the wrong type of input array to to_timestamp and test
1265        // that we get an error.
1266
1267        let mut builder = Int64Array::builder(1);
1268        builder.append_value(1);
1269        let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
1270
1271        let expected_err =
1272            "Execution error: Unsupported data type Int64 for function to_timestamp";
1273        match to_timestamp(&[int64array]) {
1274            Ok(_) => panic!("Expected error but got success"),
1275            Err(e) => {
1276                assert!(
1277                    e.to_string().contains(expected_err),
1278                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1279                );
1280            }
1281        }
1282        Ok(())
1283    }
1284
1285    #[test]
1286    fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
1287        // pass the wrong type of input array to to_timestamp and test
1288        // that we get an error.
1289
1290        let mut builder = Int64Array::builder(1);
1291        builder.append_value(1);
1292        let int64array = [
1293            ColumnarValue::Array(Arc::new(builder.finish())),
1294            ColumnarValue::Array(Arc::new(builder.finish())),
1295        ];
1296
1297        let expected_err =
1298            "Execution error: Unsupported data type Int64 for function to_timestamp";
1299        match to_timestamp(&int64array) {
1300            Ok(_) => panic!("Expected error but got success"),
1301            Err(e) => {
1302                assert!(
1303                    e.to_string().contains(expected_err),
1304                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1305                );
1306            }
1307        }
1308        Ok(())
1309    }
1310
1311    #[test]
1312    fn to_timestamp_with_unparsable_data() -> Result<()> {
1313        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1314
1315        date_string_builder.append_null();
1316
1317        date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
1318
1319        let string_array =
1320            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1321
1322        let expected_err = "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
1323        match to_timestamp(&[string_array]) {
1324            Ok(_) => panic!("Expected error but got success"),
1325            Err(e) => {
1326                assert!(
1327                    e.to_string().contains(expected_err),
1328                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1329                );
1330            }
1331        }
1332        Ok(())
1333    }
1334
1335    #[test]
1336    fn to_timestamp_with_invalid_tz() -> Result<()> {
1337        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1338
1339        date_string_builder.append_null();
1340
1341        date_string_builder.append_value("2020-09-08T13:42:29ZZ");
1342
1343        let string_array =
1344            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1345
1346        let expected_err = "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
1347        match to_timestamp(&[string_array]) {
1348            Ok(_) => panic!("Expected error but got success"),
1349            Err(e) => {
1350                assert!(
1351                    e.to_string().contains(expected_err),
1352                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1353                );
1354            }
1355        }
1356        Ok(())
1357    }
1358
1359    #[test]
1360    fn to_timestamp_with_no_matching_formats() -> Result<()> {
1361        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1362        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
1363        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
1364        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
1365
1366        date_string_builder.append_null();
1367        format1_builder.append_null();
1368        format2_builder.append_null();
1369        format3_builder.append_null();
1370
1371        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1372        format1_builder.append_value("%s");
1373        format2_builder.append_value("%c");
1374        format3_builder.append_value("%H:%M:%S");
1375
1376        let string_array = [
1377            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
1378            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
1379            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
1380            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
1381        ];
1382
1383        let expected_err = "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
1384        match to_timestamp(&string_array) {
1385            Ok(_) => panic!("Expected error but got success"),
1386            Err(e) => {
1387                assert!(
1388                    e.to_string().contains(expected_err),
1389                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1390                );
1391            }
1392        }
1393        Ok(())
1394    }
1395
1396    #[test]
1397    fn string_to_timestamp_formatted() {
1398        // Explicit timezone
1399        assert_eq!(
1400            1599572549190855000,
1401            parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
1402        );
1403        assert_eq!(
1404            1599572549190855000,
1405            parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
1406        );
1407        assert_eq!(
1408            1599572549000000000,
1409            parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
1410        ); // no fractional part
1411        assert_eq!(
1412            1599590549190855000,
1413            parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
1414        );
1415        assert_eq!(
1416            1599590549000000000,
1417            parse_timestamp_formatted("1599590549", "%s").unwrap()
1418        );
1419        assert_eq!(
1420            1599572549000000000,
1421            parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
1422                .unwrap()
1423        );
1424        assert_eq!(
1425            1642896000000000000,
1426            parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
1427        );
1428    }
1429
1430    fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
1431        let result = string_to_timestamp_nanos_formatted_with_timezone(
1432            &Some("UTC".parse()?),
1433            s,
1434            format,
1435        );
1436        if let Err(e) = &result {
1437            eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
1438        }
1439        result
1440    }
1441
1442    #[test]
1443    fn string_to_timestamp_formatted_invalid() {
1444        // Test parsing invalid formats
1445        let cases = [
1446            ("", "%Y%m%d %H%M%S", "premature end of input"),
1447            ("SS", "%c", "premature end of input"),
1448            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1449            (
1450                "Wed, 18 Feb 2015 23:16:09 GMT",
1451                "%XX",
1452                "input contains invalid characters",
1453            ),
1454            (
1455                "Wed, 18 Feb 2015 23:16:09 GMT",
1456                "%Y%m%d %H%M%S",
1457                "input contains invalid characters",
1458            ),
1459        ];
1460
1461        for (s, f, ctx) in cases {
1462            let expected = format!(
1463                "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1464            );
1465            let actual = string_to_datetime_formatted(&Utc, s, f)
1466                .unwrap_err()
1467                .strip_backtrace();
1468            assert_eq!(actual, expected)
1469        }
1470    }
1471
1472    #[test]
1473    fn string_to_timestamp_invalid_arguments() {
1474        // Test parsing invalid formats
1475        let cases = [
1476            ("", "%Y%m%d %H%M%S", "premature end of input"),
1477            ("SS", "%c", "premature end of input"),
1478            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1479            (
1480                "Wed, 18 Feb 2015 23:16:09 GMT",
1481                "%XX",
1482                "input contains invalid characters",
1483            ),
1484            (
1485                "Wed, 18 Feb 2015 23:16:09 GMT",
1486                "%Y%m%d %H%M%S",
1487                "input contains invalid characters",
1488            ),
1489        ];
1490
1491        for (s, f, ctx) in cases {
1492            let expected = format!(
1493                "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1494            );
1495            let actual = string_to_datetime_formatted(&Utc, s, f)
1496                .unwrap_err()
1497                .strip_backtrace();
1498            assert_eq!(actual, expected)
1499        }
1500    }
1501
1502    #[test]
1503    fn test_no_tz() {
1504        let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
1505            Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
1506            Box::new(ToTimestampSecondsFunc::new_with_config(
1507                &ConfigOptions::default(),
1508            )),
1509            Box::new(ToTimestampMillisFunc::new_with_config(
1510                &ConfigOptions::default(),
1511            )),
1512            Box::new(ToTimestampNanosFunc::new_with_config(
1513                &ConfigOptions::default(),
1514            )),
1515            Box::new(ToTimestampSecondsFunc::new_with_config(
1516                &ConfigOptions::default(),
1517            )),
1518        ];
1519
1520        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1521        let mut millis_builder = TimestampMillisecondArray::builder(2);
1522        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1523        let mut sec_builder = TimestampSecondArray::builder(2);
1524
1525        nanos_builder.append_value(1599572549190850000);
1526        millis_builder.append_value(1599572549190);
1527        micros_builder.append_value(1599572549190850);
1528        sec_builder.append_value(1599572549);
1529
1530        let nanos_timestamps =
1531            Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
1532        let millis_timestamps =
1533            Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1534        let micros_timestamps =
1535            Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1536        let sec_timestamps =
1537            Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1538
1539        let arrays = &[
1540            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1541            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1542            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
1543            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1544        ];
1545
1546        for udf in &udfs {
1547            for array in arrays {
1548                let rt = udf.return_type(&[array.data_type()]).unwrap();
1549                let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1550                assert!(matches!(rt, Timestamp(_, None)));
1551                let args = ScalarFunctionArgs {
1552                    args: vec![array.clone()],
1553                    arg_fields: vec![arg_field],
1554                    number_rows: 4,
1555                    return_field: Field::new("f", rt, true).into(),
1556                    config_options: Arc::new(ConfigOptions::default()),
1557                };
1558                let res = udf
1559                    .invoke_with_args(args)
1560                    .expect("that to_timestamp parsed values without error");
1561                let array = match res {
1562                    ColumnarValue::Array(res) => res,
1563                    _ => panic!("Expected a columnar array"),
1564                };
1565                let ty = array.data_type();
1566                assert!(matches!(ty, Timestamp(_, None)));
1567            }
1568        }
1569
1570        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1571        let mut millis_builder = TimestampMillisecondArray::builder(2);
1572        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1573        let mut sec_builder = TimestampSecondArray::builder(2);
1574        let mut i64_builder = Int64Array::builder(2);
1575
1576        nanos_builder.append_value(1599572549190850000);
1577        millis_builder.append_value(1599572549190);
1578        micros_builder.append_value(1599572549190850);
1579        sec_builder.append_value(1599572549);
1580        i64_builder.append_value(1599572549);
1581
1582        let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1583        let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1584        let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1585        let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1586        let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1587
1588        let arrays = &[
1589            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1590            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1591            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
1592            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1593            ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1594        ];
1595
1596        for udf in &udfs {
1597            for array in arrays {
1598                let rt = udf.return_type(&[array.data_type()]).unwrap();
1599                assert!(matches!(rt, Timestamp(_, None)));
1600                let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1601                let args = ScalarFunctionArgs {
1602                    args: vec![array.clone()],
1603                    arg_fields: vec![arg_field],
1604                    number_rows: 5,
1605                    return_field: Field::new("f", rt, true).into(),
1606                    config_options: Arc::new(ConfigOptions::default()),
1607                };
1608                let res = udf
1609                    .invoke_with_args(args)
1610                    .expect("that to_timestamp parsed values without error");
1611                let array = match res {
1612                    ColumnarValue::Array(res) => res,
1613                    _ => panic!("Expected a columnar array"),
1614                };
1615                let ty = array.data_type();
1616                assert!(matches!(ty, Timestamp(_, None)));
1617            }
1618        }
1619    }
1620
1621    #[test]
1622    fn test_to_timestamp_arg_validation() {
1623        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1624        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1625
1626        let data = date_string_builder.finish();
1627
1628        let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1629            (Arc::new(to_timestamp), Nanosecond),
1630            (Arc::new(to_timestamp_micros), Microsecond),
1631            (Arc::new(to_timestamp_millis), Millisecond),
1632            (Arc::new(to_timestamp_nanos), Nanosecond),
1633            (Arc::new(to_timestamp_seconds), Second),
1634        ];
1635
1636        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1637        let mut millis_builder = TimestampMillisecondArray::builder(2);
1638        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1639        let mut sec_builder = TimestampSecondArray::builder(2);
1640
1641        nanos_builder.append_value(1599572549190850000);
1642        millis_builder.append_value(1599572549190);
1643        micros_builder.append_value(1599572549190850);
1644        sec_builder.append_value(1599572549);
1645
1646        let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1647        let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1648        let micros_expected_timestamps = &micros_builder.finish() as &dyn Array;
1649        let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1650
1651        for (func, time_unit) in funcs {
1652            // test UTF8
1653            let string_array = [
1654                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1655                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1656                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1657                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1658            ];
1659            let parsed_timestamps = func(&string_array)
1660                .expect("that to_timestamp with format args parsed values without error");
1661            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1662                assert_eq!(parsed_array.len(), 1);
1663                match time_unit {
1664                    Nanosecond => {
1665                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1666                    }
1667                    Millisecond => {
1668                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1669                    }
1670                    Microsecond => {
1671                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1672                    }
1673                    Second => {
1674                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1675                    }
1676                };
1677            } else {
1678                panic!("Expected a columnar array")
1679            }
1680
1681            // test LargeUTF8
1682            let string_array = [
1683                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1684                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1685                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1686                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1687            ];
1688            let parsed_timestamps = func(&string_array)
1689                .expect("that to_timestamp with format args parsed values without error");
1690            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1691                assert_eq!(parsed_array.len(), 1);
1692                assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1693
1694                match time_unit {
1695                    Nanosecond => {
1696                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1697                    }
1698                    Millisecond => {
1699                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1700                    }
1701                    Microsecond => {
1702                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1703                    }
1704                    Second => {
1705                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1706                    }
1707                };
1708            } else {
1709                panic!("Expected a columnar array")
1710            }
1711
1712            // test other types
1713            let string_array = [
1714                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1715                ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1716                ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1717                ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1718            ];
1719
1720            let expected = "Unsupported data type Int32 for function".to_string();
1721            let actual = func(&string_array).unwrap_err().to_string();
1722            assert_contains!(actual, expected);
1723
1724            // test other types
1725            let string_array = [
1726                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1727                ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1728                    vec![1i64].into(),
1729                    None,
1730                )) as ArrayRef),
1731            ];
1732
1733            let expected = "Unsupported data type".to_string();
1734            let actual = func(&string_array).unwrap_err().to_string();
1735            assert_contains!(actual, expected);
1736        }
1737    }
1738}