Skip to main content

datafusion_functions/datetime/
to_timestamp.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::sync::Arc;
19
20use crate::datetime::common::*;
21use arrow::array::timezone::Tz;
22use arrow::array::{
23    Array, Decimal128Array, Float16Array, Float32Array, Float64Array,
24    TimestampNanosecondArray,
25};
26use arrow::datatypes::DataType::*;
27use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
28use arrow::datatypes::{
29    ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
30    TimestampNanosecondType, TimestampSecondType,
31};
32use datafusion_common::config::ConfigOptions;
33use datafusion_common::{Result, ScalarType, ScalarValue, exec_err};
34use datafusion_expr::{
35    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl,
36    Signature, Volatility,
37};
38use datafusion_macros::user_doc;
39
40#[user_doc(
41    doc_section(label = "Time and Date Functions"),
42    description = r#"
43Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
44integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
45if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. 
46Strings that parse without a time zone are treated as if they are in the
47session time zone, or UTC if no session time zone is set.
48Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
49
50Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range
51for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between
52`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
53for the input outside of supported bounds.
54
55The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
56The time zone can be a value like +00:00, 'Europe/London' etc.
57"#,
58    syntax_example = "to_timestamp(expression[, ..., format_n])",
59    sql_example = r#"```sql
60> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
61+-----------------------------------------------------------+
62| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
63+-----------------------------------------------------------+
64| 2023-01-31T14:26:56.123456789                             |
65+-----------------------------------------------------------+
66> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
67+--------------------------------------------------------------------------------------------------------+
68| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
69+--------------------------------------------------------------------------------------------------------+
70| 2023-05-17T03:59:00.123456789                                                                          |
71+--------------------------------------------------------------------------------------------------------+
72```
73Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
74"#,
75    argument(
76        name = "expression",
77        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
78    ),
79    argument(
80        name = "format_n",
81        description = r#"
82Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
83Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
84parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
85only supported at the end of the string preceded by a space.
86"#
87    )
88)]
89#[derive(Debug, PartialEq, Eq, Hash)]
90pub struct ToTimestampFunc {
91    signature: Signature,
92    timezone: Option<Arc<str>>,
93}
94
95#[user_doc(
96    doc_section(label = "Time and Date Functions"),
97    description = r#"
98Converts a value to a timestamp (`YYYY-MM-DDT00:00:00<TZ>`) in the session time zone. Supports strings,
99integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
100if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. 
101Strings that parse without a time zone are treated as if they are in the
102session time zone, or UTC if no session time zone is set.
103Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
104
105The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
106The time zone can be a value like +00:00, 'Europe/London' etc.
107"#,
108    syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
109    sql_example = r#"```sql
110> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
111+-------------------------------------------------------------------+
112| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
113+-------------------------------------------------------------------+
114| 2023-01-31T14:26:56                                               |
115+-------------------------------------------------------------------+
116> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
117+----------------------------------------------------------------------------------------------------------------+
118| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
119+----------------------------------------------------------------------------------------------------------------+
120| 2023-05-17T03:59:00                                                                                            |
121+----------------------------------------------------------------------------------------------------------------+
122```
123Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
124"#,
125    argument(
126        name = "expression",
127        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
128    ),
129    argument(
130        name = "format_n",
131        description = r#"
132Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
133Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
134parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
135only supported at the end of the string preceded by a space.
136"#
137    )
138)]
139#[derive(Debug, PartialEq, Eq, Hash)]
140pub struct ToTimestampSecondsFunc {
141    signature: Signature,
142    timezone: Option<Arc<str>>,
143}
144
145#[user_doc(
146    doc_section(label = "Time and Date Functions"),
147    description = r#"
148Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000<TZ>`) in the session time zone. Supports strings,
149integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
150if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. 
151Strings that parse without a time zone are treated as if they are in the
152session time zone, or UTC if no session time zone is set.
153Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
154
155The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
156The time zone can be a value like +00:00, 'Europe/London' etc.
157"#,
158    syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
159    sql_example = r#"```sql
160> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
161+------------------------------------------------------------------+
162| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
163+------------------------------------------------------------------+
164| 2023-01-31T14:26:56.123                                          |
165+------------------------------------------------------------------+
166> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
167+---------------------------------------------------------------------------------------------------------------+
168| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
169+---------------------------------------------------------------------------------------------------------------+
170| 2023-05-17T03:59:00.123                                                                                       |
171+---------------------------------------------------------------------------------------------------------------+
172```
173Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
174"#,
175    argument(
176        name = "expression",
177        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
178    ),
179    argument(
180        name = "format_n",
181        description = r#"
182Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
183Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
184parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
185only supported at the end of the string preceded by a space.
186"#
187    )
188)]
189#[derive(Debug, PartialEq, Eq, Hash)]
190pub struct ToTimestampMillisFunc {
191    signature: Signature,
192    timezone: Option<Arc<str>>,
193}
194
195#[user_doc(
196    doc_section(label = "Time and Date Functions"),
197    description = r#"
198Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
199integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
200if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. 
201Strings that parse without a time zone are treated as if they are in the
202session time zone, or UTC if no session time zone is set.
203Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`).
204
205The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
206The time zone can be a value like +00:00, 'Europe/London' etc.
207"#,
208    syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
209    sql_example = r#"```sql
210> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
211+------------------------------------------------------------------+
212| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
213+------------------------------------------------------------------+
214| 2023-01-31T14:26:56.123456                                       |
215+------------------------------------------------------------------+
216> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
217+---------------------------------------------------------------------------------------------------------------+
218| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
219+---------------------------------------------------------------------------------------------------------------+
220| 2023-05-17T03:59:00.123456                                                                                    |
221+---------------------------------------------------------------------------------------------------------------+
222```
223Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
224"#,
225    argument(
226        name = "expression",
227        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
228    ),
229    argument(
230        name = "format_n",
231        description = r#"
232Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
233Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
234parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
235only supported at the end of the string preceded by a space.
236"#
237    )
238)]
239#[derive(Debug, PartialEq, Eq, Hash)]
240pub struct ToTimestampMicrosFunc {
241    signature: Signature,
242    timezone: Option<Arc<str>>,
243}
244
245#[user_doc(
246    doc_section(label = "Time and Date Functions"),
247    description = r#"
248Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000<TZ>`) in the session time zone. Supports strings,
249integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
250if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. 
251Strings that parse without a time zone are treated as if they are in the
252session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
253
254The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
255The time zone can be a value like +00:00, 'Europe/London' etc.
256"#,
257    syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
258    sql_example = r#"```sql
259> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
260+-----------------------------------------------------------------+
261| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
262+-----------------------------------------------------------------+
263| 2023-01-31T14:26:56.123456789                                   |
264+-----------------------------------------------------------------+
265> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
266+--------------------------------------------------------------------------------------------------------------+
267| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
268+--------------------------------------------------------------------------------------------------------------+
269| 2023-05-17T03:59:00.123456789                                                                                |
270+---------------------------------------------------------------------------------------------------------------+
271```
272Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
273"#,
274    argument(
275        name = "expression",
276        description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
277    ),
278    argument(
279        name = "format_n",
280        description = r#"
281Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
282Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
283parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
284only supported at the end of the string preceded by a space.
285"#
286    )
287)]
288#[derive(Debug, PartialEq, Eq, Hash)]
289pub struct ToTimestampNanosFunc {
290    signature: Signature,
291    timezone: Option<Arc<str>>,
292}
293
294/// Macro to generate boilerplate constructors and config methods for ToTimestamp* functions.
295/// Generates: Default impl, deprecated new(), new_with_config(), and extracts timezone from ConfigOptions.
296macro_rules! impl_to_timestamp_constructors {
297    ($func:ty) => {
298        impl Default for $func {
299            fn default() -> Self {
300                Self::new_with_config(&ConfigOptions::default())
301            }
302        }
303
304        impl $func {
305            #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")]
306            /// Deprecated constructor retained for backwards compatibility.
307            ///
308            /// Prefer `new_with_config` which allows specifying the
309            /// timezone via [`ConfigOptions`]. This helper now mirrors the
310            /// canonical default offset (None) provided by `ConfigOptions::default()`.
311            pub fn new() -> Self {
312                Self::new_with_config(&ConfigOptions::default())
313            }
314
315            pub fn new_with_config(config: &ConfigOptions) -> Self {
316                Self {
317                    signature: Signature::variadic_any(Volatility::Immutable),
318                    timezone: config
319                        .execution
320                        .time_zone
321                        .as_ref()
322                        .map(|tz| Arc::from(tz.as_str())),
323                }
324            }
325        }
326    };
327}
328
329impl_to_timestamp_constructors!(ToTimestampFunc);
330impl_to_timestamp_constructors!(ToTimestampSecondsFunc);
331impl_to_timestamp_constructors!(ToTimestampMillisFunc);
332impl_to_timestamp_constructors!(ToTimestampMicrosFunc);
333impl_to_timestamp_constructors!(ToTimestampNanosFunc);
334
335fn decimal_to_nanoseconds(value: i128, scale: i8) -> i64 {
336    let nanos_exponent = 9_i16 - scale as i16;
337    let timestamp_nanos = if nanos_exponent >= 0 {
338        value * 10_i128.pow(nanos_exponent as u32)
339    } else {
340        value / 10_i128.pow(nanos_exponent.unsigned_abs() as u32)
341    };
342    timestamp_nanos as i64
343}
344
345fn decimal128_to_timestamp_nanos(
346    arg: &ColumnarValue,
347    tz: Option<Arc<str>>,
348) -> Result<ColumnarValue> {
349    match arg {
350        ColumnarValue::Scalar(ScalarValue::Decimal128(Some(value), _, scale)) => {
351            let timestamp_nanos = decimal_to_nanoseconds(*value, *scale);
352            Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
353                Some(timestamp_nanos),
354                tz,
355            )))
356        }
357        ColumnarValue::Scalar(ScalarValue::Decimal128(None, _, _)) => Ok(
358            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(None, tz)),
359        ),
360        ColumnarValue::Array(arr) => {
361            let decimal_arr = downcast_arg!(arr, Decimal128Array);
362            let scale = decimal_arr.scale();
363            let result: TimestampNanosecondArray = decimal_arr
364                .iter()
365                .map(|v| v.map(|val| decimal_to_nanoseconds(val, scale)))
366                .collect();
367            let result = result.with_timezone_opt(tz);
368            Ok(ColumnarValue::Array(Arc::new(result)))
369        }
370        _ => exec_err!("Invalid Decimal128 value for to_timestamp"),
371    }
372}
373
374/// to_timestamp SQL function
375///
376/// Note: `to_timestamp` returns `Timestamp(Nanosecond)` though its arguments are interpreted as **seconds**.
377/// The supported range for integer input is between `-9223372037` and `9223372036`.
378/// Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`.
379/// Please use `to_timestamp_seconds` for the input outside of supported bounds.
380/// Macro to generate the with_updated_config method for ToTimestamp* functions.
381macro_rules! impl_with_updated_config {
382    () => {
383        fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> {
384            Some(Self::new_with_config(config).into())
385        }
386    };
387}
388
389impl ScalarUDFImpl for ToTimestampFunc {
390    fn name(&self) -> &str {
391        "to_timestamp"
392    }
393
394    fn signature(&self) -> &Signature {
395        &self.signature
396    }
397
398    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
399        Ok(Timestamp(Nanosecond, self.timezone.clone()))
400    }
401
402    impl_with_updated_config!();
403
404    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
405        let ScalarFunctionArgs { args, .. } = args;
406
407        if args.is_empty() {
408            return exec_err!(
409                "to_timestamp function requires 1 or more arguments, got {}",
410                args.len()
411            );
412        }
413
414        // validate that any args after the first one are Utf8
415        if args.len() > 1 {
416            validate_data_types(&args, "to_timestamp")?;
417        }
418
419        let tz = self.timezone.clone();
420
421        match args[0].data_type() {
422            Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 => args[0]
423                .cast_to(&Timestamp(Second, None), None)?
424                .cast_to(&Timestamp(Nanosecond, tz), None),
425            Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None),
426            Float16 => match &args[0] {
427                ColumnarValue::Scalar(ScalarValue::Float16(value)) => {
428                    let timestamp_nanos =
429                        value.map(|v| (v.to_f64() * 1_000_000_000.0) as i64);
430                    Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
431                        timestamp_nanos,
432                        tz,
433                    )))
434                }
435                ColumnarValue::Array(arr) => {
436                    let f16_arr = downcast_arg!(arr, Float16Array);
437                    let result: TimestampNanosecondArray =
438                        f16_arr.unary(|x| (x.to_f64() * 1_000_000_000.0) as i64);
439                    Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
440                }
441                _ => exec_err!("Invalid Float16 value for to_timestamp"),
442            },
443            Float32 => match &args[0] {
444                ColumnarValue::Scalar(ScalarValue::Float32(value)) => {
445                    let timestamp_nanos =
446                        value.map(|v| (v as f64 * 1_000_000_000.0) as i64);
447                    Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
448                        timestamp_nanos,
449                        tz,
450                    )))
451                }
452                ColumnarValue::Array(arr) => {
453                    let f32_arr = downcast_arg!(arr, Float32Array);
454                    let result: TimestampNanosecondArray =
455                        f32_arr.unary(|x| (x as f64 * 1_000_000_000.0) as i64);
456                    Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
457                }
458                _ => exec_err!("Invalid Float32 value for to_timestamp"),
459            },
460            Float64 => match &args[0] {
461                ColumnarValue::Scalar(ScalarValue::Float64(value)) => {
462                    let timestamp_nanos = value.map(|v| (v * 1_000_000_000.0) as i64);
463                    Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
464                        timestamp_nanos,
465                        tz,
466                    )))
467                }
468                ColumnarValue::Array(arr) => {
469                    let f64_arr = downcast_arg!(arr, Float64Array);
470                    let result: TimestampNanosecondArray =
471                        f64_arr.unary(|x| (x * 1_000_000_000.0) as i64);
472                    Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
473                }
474                _ => exec_err!("Invalid Float64 value for to_timestamp"),
475            },
476            Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, _) => {
477                let arg = args[0].cast_to(&Decimal128(38, 9), None)?;
478                decimal128_to_timestamp_nanos(&arg, tz)
479            }
480            Decimal128(_, _) => decimal128_to_timestamp_nanos(&args[0], tz),
481            Utf8View | LargeUtf8 | Utf8 => {
482                to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp", &tz)
483            }
484            other => {
485                exec_err!("Unsupported data type {other} for function to_timestamp")
486            }
487        }
488    }
489
490    fn documentation(&self) -> Option<&Documentation> {
491        self.doc()
492    }
493}
494
495impl ScalarUDFImpl for ToTimestampSecondsFunc {
496    fn name(&self) -> &str {
497        "to_timestamp_seconds"
498    }
499
500    fn signature(&self) -> &Signature {
501        &self.signature
502    }
503
504    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
505        Ok(Timestamp(Second, self.timezone.clone()))
506    }
507
508    impl_with_updated_config!();
509
510    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
511        let ScalarFunctionArgs { args, .. } = args;
512
513        if args.is_empty() {
514            return exec_err!(
515                "to_timestamp_seconds function requires 1 or more arguments, got {}",
516                args.len()
517            );
518        }
519
520        // validate that any args after the first one are Utf8
521        if args.len() > 1 {
522            validate_data_types(&args, "to_timestamp")?;
523        }
524
525        let tz = self.timezone.clone();
526
527        match args[0].data_type() {
528            Null
529            | Int8
530            | Int16
531            | Int32
532            | Int64
533            | UInt8
534            | UInt16
535            | UInt32
536            | UInt64
537            | Timestamp(_, _)
538            | Decimal32(_, _)
539            | Decimal64(_, _)
540            | Decimal128(_, _)
541            | Decimal256(_, _) => args[0].cast_to(&Timestamp(Second, tz), None),
542            Float16 | Float32 | Float64 => args[0]
543                .cast_to(&Int64, None)?
544                .cast_to(&Timestamp(Second, tz), None),
545            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampSecondType>(
546                &args,
547                "to_timestamp_seconds",
548                &self.timezone,
549            ),
550            other => {
551                exec_err!(
552                    "Unsupported data type {} for function to_timestamp_seconds",
553                    other
554                )
555            }
556        }
557    }
558
559    fn documentation(&self) -> Option<&Documentation> {
560        self.doc()
561    }
562}
563
564impl ScalarUDFImpl for ToTimestampMillisFunc {
565    fn name(&self) -> &str {
566        "to_timestamp_millis"
567    }
568
569    fn signature(&self) -> &Signature {
570        &self.signature
571    }
572
573    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
574        Ok(Timestamp(Millisecond, self.timezone.clone()))
575    }
576
577    impl_with_updated_config!();
578
579    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
580        let ScalarFunctionArgs { args, .. } = args;
581
582        if args.is_empty() {
583            return exec_err!(
584                "to_timestamp_millis function requires 1 or more arguments, got {}",
585                args.len()
586            );
587        }
588
589        // validate that any args after the first one are Utf8
590        if args.len() > 1 {
591            validate_data_types(&args, "to_timestamp")?;
592        }
593
594        match args[0].data_type() {
595            Null
596            | Int8
597            | Int16
598            | Int32
599            | Int64
600            | UInt8
601            | UInt16
602            | UInt32
603            | UInt64
604            | Timestamp(_, _)
605            | Decimal32(_, _)
606            | Decimal64(_, _)
607            | Decimal128(_, _)
608            | Decimal256(_, _) => {
609                args[0].cast_to(&Timestamp(Millisecond, self.timezone.clone()), None)
610            }
611            Float16 | Float32 | Float64 => args[0]
612                .cast_to(&Int64, None)?
613                .cast_to(&Timestamp(Millisecond, self.timezone.clone()), None),
614            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
615                &args,
616                "to_timestamp_millis",
617                &self.timezone,
618            ),
619            other => {
620                exec_err!(
621                    "Unsupported data type {} for function to_timestamp_millis",
622                    other
623                )
624            }
625        }
626    }
627
628    fn documentation(&self) -> Option<&Documentation> {
629        self.doc()
630    }
631}
632
633impl ScalarUDFImpl for ToTimestampMicrosFunc {
634    fn name(&self) -> &str {
635        "to_timestamp_micros"
636    }
637
638    fn signature(&self) -> &Signature {
639        &self.signature
640    }
641
642    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
643        Ok(Timestamp(Microsecond, self.timezone.clone()))
644    }
645
646    impl_with_updated_config!();
647
648    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
649        let ScalarFunctionArgs { args, .. } = args;
650
651        if args.is_empty() {
652            return exec_err!(
653                "to_timestamp_micros function requires 1 or more arguments, got {}",
654                args.len()
655            );
656        }
657
658        // validate that any args after the first one are Utf8
659        if args.len() > 1 {
660            validate_data_types(&args, "to_timestamp")?;
661        }
662
663        match args[0].data_type() {
664            Null
665            | Int8
666            | Int16
667            | Int32
668            | Int64
669            | UInt8
670            | UInt16
671            | UInt32
672            | UInt64
673            | Timestamp(_, _)
674            | Decimal32(_, _)
675            | Decimal64(_, _)
676            | Decimal128(_, _)
677            | Decimal256(_, _) => {
678                args[0].cast_to(&Timestamp(Microsecond, self.timezone.clone()), None)
679            }
680            Float16 | Float32 | Float64 => args[0]
681                .cast_to(&Int64, None)?
682                .cast_to(&Timestamp(Microsecond, self.timezone.clone()), None),
683            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
684                &args,
685                "to_timestamp_micros",
686                &self.timezone,
687            ),
688            other => {
689                exec_err!(
690                    "Unsupported data type {} for function to_timestamp_micros",
691                    other
692                )
693            }
694        }
695    }
696
697    fn documentation(&self) -> Option<&Documentation> {
698        self.doc()
699    }
700}
701
702impl ScalarUDFImpl for ToTimestampNanosFunc {
703    fn name(&self) -> &str {
704        "to_timestamp_nanos"
705    }
706
707    fn signature(&self) -> &Signature {
708        &self.signature
709    }
710
711    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
712        Ok(Timestamp(Nanosecond, self.timezone.clone()))
713    }
714
715    impl_with_updated_config!();
716
717    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
718        let ScalarFunctionArgs { args, .. } = args;
719
720        if args.is_empty() {
721            return exec_err!(
722                "to_timestamp_nanos function requires 1 or more arguments, got {}",
723                args.len()
724            );
725        }
726
727        // validate that any args after the first one are Utf8
728        if args.len() > 1 {
729            validate_data_types(&args, "to_timestamp")?;
730        }
731
732        match args[0].data_type() {
733            Null
734            | Int8
735            | Int16
736            | Int32
737            | Int64
738            | UInt8
739            | UInt16
740            | UInt32
741            | UInt64
742            | Timestamp(_, _)
743            | Decimal32(_, _)
744            | Decimal64(_, _)
745            | Decimal128(_, _)
746            | Decimal256(_, _) => {
747                args[0].cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None)
748            }
749            Float16 | Float32 | Float64 => args[0]
750                .cast_to(&Int64, None)?
751                .cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None),
752            Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampNanosecondType>(
753                &args,
754                "to_timestamp_nanos",
755                &self.timezone,
756            ),
757            other => {
758                exec_err!(
759                    "Unsupported data type {} for function to_timestamp_nanos",
760                    other
761                )
762            }
763        }
764    }
765
766    fn documentation(&self) -> Option<&Documentation> {
767        self.doc()
768    }
769}
770
771fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
772    args: &[ColumnarValue],
773    name: &str,
774    timezone: &Option<Arc<str>>,
775) -> Result<ColumnarValue> {
776    let factor = match T::UNIT {
777        Second => 1_000_000_000,
778        Millisecond => 1_000_000,
779        Microsecond => 1_000,
780        Nanosecond => 1,
781    };
782
783    let tz = match timezone.clone() {
784        Some(tz) => Some(tz.parse::<Tz>()?),
785        None => None,
786    };
787
788    match args.len() {
789        1 => handle::<T, _>(
790            args,
791            move |s| string_to_timestamp_nanos_with_timezone(&tz, s).map(|n| n / factor),
792            name,
793            &Timestamp(T::UNIT, timezone.clone()),
794        ),
795        n if n >= 2 => handle_multiple::<T, _, _>(
796            args,
797            move |s, format| {
798                string_to_timestamp_nanos_formatted_with_timezone(&tz, s, format)
799            },
800            |n| n / factor,
801            name,
802            &Timestamp(T::UNIT, timezone.clone()),
803        ),
804        _ => exec_err!("Unsupported 0 argument count for function {name}"),
805    }
806}
807
808#[cfg(test)]
809mod tests {
810
811    use arrow::array::types::Int64Type;
812    use arrow::array::{
813        Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
814        TimestampNanosecondArray, TimestampSecondArray,
815    };
816    use arrow::array::{ArrayRef, Int64Array, StringBuilder};
817    use arrow::datatypes::{Field, TimeUnit};
818    use chrono::{DateTime, FixedOffset, Utc};
819    use datafusion_common::{DataFusionError, assert_contains};
820    use datafusion_expr::ScalarFunctionImplementation;
821
822    use super::*;
823
824    fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
825        let timezone: Option<Arc<str>> = Some("UTC".into());
826        to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp", &timezone)
827    }
828
829    /// to_timestamp_millis SQL function
830    fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
831        let timezone: Option<Arc<str>> = Some("UTC".into());
832        to_timestamp_impl::<TimestampMillisecondType>(
833            args,
834            "to_timestamp_millis",
835            &timezone,
836        )
837    }
838
839    /// to_timestamp_micros SQL function
840    fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
841        let timezone: Option<Arc<str>> = Some("UTC".into());
842        to_timestamp_impl::<TimestampMicrosecondType>(
843            args,
844            "to_timestamp_micros",
845            &timezone,
846        )
847    }
848
849    /// to_timestamp_nanos SQL function
850    fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
851        let timezone: Option<Arc<str>> = Some("UTC".into());
852        to_timestamp_impl::<TimestampNanosecondType>(
853            args,
854            "to_timestamp_nanos",
855            &timezone,
856        )
857    }
858
859    /// to_timestamp_seconds SQL function
860    fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
861        let timezone: Option<Arc<str>> = Some("UTC".into());
862        to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds", &timezone)
863    }
864
865    fn udfs_and_timeunit() -> Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> {
866        let udfs: Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> = vec![
867            (
868                Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
869                Nanosecond,
870            ),
871            (
872                Box::new(ToTimestampSecondsFunc::new_with_config(
873                    &ConfigOptions::default(),
874                )),
875                Second,
876            ),
877            (
878                Box::new(ToTimestampMillisFunc::new_with_config(
879                    &ConfigOptions::default(),
880                )),
881                Millisecond,
882            ),
883            (
884                Box::new(ToTimestampMicrosFunc::new_with_config(
885                    &ConfigOptions::default(),
886                )),
887                Microsecond,
888            ),
889            (
890                Box::new(ToTimestampNanosFunc::new_with_config(
891                    &ConfigOptions::default(),
892                )),
893                Nanosecond,
894            ),
895        ];
896        udfs
897    }
898
899    fn validate_expected_error(
900        options: &mut ConfigOptions,
901        args: ScalarFunctionArgs,
902        expected_err: &str,
903    ) {
904        let udfs = udfs_and_timeunit();
905
906        for (udf, _) in udfs {
907            match udf
908                .with_updated_config(options)
909                .unwrap()
910                .invoke_with_args(args.clone())
911            {
912                Ok(_) => panic!("Expected error but got success"),
913                Err(e) => {
914                    assert!(
915                        e.to_string().contains(expected_err),
916                        "Can not find expected error '{expected_err}'. Actual error '{e}'"
917                    );
918                }
919            }
920        }
921    }
922
923    #[test]
924    fn to_timestamp_arrays_and_nulls() -> Result<()> {
925        // ensure that arrow array implementation is wired up and handles nulls correctly
926
927        let mut string_builder = StringBuilder::with_capacity(2, 1024);
928        let mut ts_builder = TimestampNanosecondArray::builder(2);
929
930        string_builder.append_value("2020-09-08T13:42:29.190855");
931        ts_builder.append_value(1599572549190855000);
932
933        string_builder.append_null();
934        ts_builder.append_null();
935        let expected_timestamps = &ts_builder.finish() as &dyn Array;
936
937        let string_array =
938            ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
939        let parsed_timestamps = to_timestamp(&[string_array])
940            .expect("that to_timestamp parsed values without error");
941        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
942            assert_eq!(parsed_array.len(), 2);
943            assert_eq!(expected_timestamps, parsed_array.as_ref());
944        } else {
945            panic!("Expected a columnar array")
946        }
947        Ok(())
948    }
949
950    #[test]
951    fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
952        // ensure that arrow array implementation is wired up and handles nulls correctly
953
954        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
955        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
956        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
957        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
958        let mut ts_builder = TimestampNanosecondArray::builder(2);
959
960        date_string_builder.append_null();
961        format1_builder.append_null();
962        format2_builder.append_null();
963        format3_builder.append_null();
964        ts_builder.append_null();
965
966        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
967        format1_builder.append_value("%s");
968        format2_builder.append_value("%c");
969        format3_builder.append_value("%+");
970        ts_builder.append_value(1599572549190850000);
971
972        let expected_timestamps = &ts_builder.finish() as &dyn Array;
973
974        let string_array = [
975            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
976            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
977            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
978            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
979        ];
980        let parsed_timestamps = to_timestamp(&string_array)
981            .expect("that to_timestamp with format args parsed values without error");
982        if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
983            assert_eq!(parsed_array.len(), 2);
984            assert_eq!(expected_timestamps, parsed_array.as_ref());
985        } else {
986            panic!("Expected a columnar array")
987        }
988        Ok(())
989    }
990
991    #[test]
992    fn to_timestamp_respects_execution_timezone() -> Result<()> {
993        let udfs = udfs_and_timeunit();
994
995        let mut options = ConfigOptions::default();
996        options.execution.time_zone = Some("-05:00".to_string());
997
998        let time_zone: Option<Arc<str>> = options
999            .execution
1000            .time_zone
1001            .as_ref()
1002            .map(|tz| Arc::from(tz.as_str()));
1003
1004        for (udf, time_unit) in udfs {
1005            let field = Field::new("arg", Utf8, true).into();
1006
1007            let args = ScalarFunctionArgs {
1008                args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1009                    "2020-09-08T13:42:29".to_string(),
1010                )))],
1011                arg_fields: vec![field],
1012                number_rows: 1,
1013                return_field: Field::new(
1014                    "f",
1015                    Timestamp(time_unit, Some("-05:00".into())),
1016                    true,
1017                )
1018                .into(),
1019                config_options: Arc::new(options.clone()),
1020            };
1021
1022            let result = udf
1023                .with_updated_config(&options.clone())
1024                .unwrap()
1025                .invoke_with_args(args)?;
1026            let result = match time_unit {
1027                Second => {
1028                    let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
1029                        Some(value),
1030                        tz,
1031                    )) = result
1032                    else {
1033                        panic!("expected scalar timestamp");
1034                    };
1035
1036                    assert_eq!(tz, time_zone);
1037
1038                    value
1039                }
1040                Millisecond => {
1041                    let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
1042                        Some(value),
1043                        tz,
1044                    )) = result
1045                    else {
1046                        panic!("expected scalar timestamp");
1047                    };
1048
1049                    assert_eq!(tz, time_zone);
1050
1051                    value
1052                }
1053                Microsecond => {
1054                    let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
1055                        Some(value),
1056                        tz,
1057                    )) = result
1058                    else {
1059                        panic!("expected scalar timestamp");
1060                    };
1061
1062                    assert_eq!(tz, time_zone);
1063
1064                    value
1065                }
1066                Nanosecond => {
1067                    let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
1068                        Some(value),
1069                        tz,
1070                    )) = result
1071                    else {
1072                        panic!("expected scalar timestamp");
1073                    };
1074
1075                    assert_eq!(tz, time_zone);
1076
1077                    value
1078                }
1079            };
1080
1081            let scale = match time_unit {
1082                Second => 1_000_000_000,
1083                Millisecond => 1_000_000,
1084                Microsecond => 1_000,
1085                Nanosecond => 1,
1086            };
1087
1088            let offset = FixedOffset::west_opt(5 * 3600).unwrap();
1089            let result = Some(
1090                DateTime::<Utc>::from_timestamp_nanos(result * scale)
1091                    .with_timezone(&offset)
1092                    .to_string(),
1093            );
1094
1095            assert_eq!(result, Some("2020-09-08 13:42:29 -05:00".to_string()));
1096        }
1097
1098        Ok(())
1099    }
1100
1101    #[test]
1102    fn to_timestamp_formats_respects_execution_timezone() -> Result<()> {
1103        let udfs = udfs_and_timeunit();
1104
1105        let mut options = ConfigOptions::default();
1106        options.execution.time_zone = Some("-05:00".to_string());
1107
1108        let time_zone: Option<Arc<str>> = options
1109            .execution
1110            .time_zone
1111            .as_ref()
1112            .map(|tz| Arc::from(tz.as_str()));
1113
1114        let expr_field = Field::new("arg", Utf8, true).into();
1115        let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1116
1117        for (udf, time_unit) in udfs {
1118            for (value, format, expected_str) in [
1119                (
1120                    "2020-09-08 09:42:29 -05:00",
1121                    "%Y-%m-%d %H:%M:%S %z",
1122                    Some("2020-09-08 09:42:29 -05:00"),
1123                ),
1124                (
1125                    "2020-09-08T13:42:29Z",
1126                    "%+",
1127                    Some("2020-09-08 08:42:29 -05:00"),
1128                ),
1129                (
1130                    "2020-09-08 13:42:29 UTC",
1131                    "%Y-%m-%d %H:%M:%S %Z",
1132                    Some("2020-09-08 08:42:29 -05:00"),
1133                ),
1134                (
1135                    "+0000 2024-01-01 12:00:00",
1136                    "%z %Y-%m-%d %H:%M:%S",
1137                    Some("2024-01-01 07:00:00 -05:00"),
1138                ),
1139                (
1140                    "20200908134229+0100",
1141                    "%Y%m%d%H%M%S%z",
1142                    Some("2020-09-08 07:42:29 -05:00"),
1143                ),
1144                (
1145                    "2020-09-08+0230 13:42",
1146                    "%Y-%m-%d%z %H:%M",
1147                    Some("2020-09-08 06:12:00 -05:00"),
1148                ),
1149            ] {
1150                let args = ScalarFunctionArgs {
1151                    args: vec![
1152                        ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1153                        ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1154                            format.to_string(),
1155                        ))),
1156                    ],
1157                    arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1158                    number_rows: 1,
1159                    return_field: Field::new(
1160                        "f",
1161                        Timestamp(time_unit, Some("-05:00".into())),
1162                        true,
1163                    )
1164                    .into(),
1165                    config_options: Arc::new(options.clone()),
1166                };
1167                let result = udf
1168                    .with_updated_config(&options.clone())
1169                    .unwrap()
1170                    .invoke_with_args(args)?;
1171                let result = match time_unit {
1172                    Second => {
1173                        let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
1174                            Some(value),
1175                            tz,
1176                        )) = result
1177                        else {
1178                            panic!("expected scalar timestamp");
1179                        };
1180
1181                        assert_eq!(tz, time_zone);
1182
1183                        value
1184                    }
1185                    Millisecond => {
1186                        let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
1187                            Some(value),
1188                            tz,
1189                        )) = result
1190                        else {
1191                            panic!("expected scalar timestamp");
1192                        };
1193
1194                        assert_eq!(tz, time_zone);
1195
1196                        value
1197                    }
1198                    Microsecond => {
1199                        let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
1200                            Some(value),
1201                            tz,
1202                        )) = result
1203                        else {
1204                            panic!("expected scalar timestamp");
1205                        };
1206
1207                        assert_eq!(tz, time_zone);
1208
1209                        value
1210                    }
1211                    Nanosecond => {
1212                        let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
1213                            Some(value),
1214                            tz,
1215                        )) = result
1216                        else {
1217                            panic!("expected scalar timestamp");
1218                        };
1219
1220                        assert_eq!(tz, time_zone);
1221
1222                        value
1223                    }
1224                };
1225
1226                let scale = match time_unit {
1227                    Second => 1_000_000_000,
1228                    Millisecond => 1_000_000,
1229                    Microsecond => 1_000,
1230                    Nanosecond => 1,
1231                };
1232                let offset = FixedOffset::west_opt(5 * 3600).unwrap();
1233                let result = Some(
1234                    DateTime::<Utc>::from_timestamp_nanos(result * scale)
1235                        .with_timezone(&offset)
1236                        .to_string(),
1237                );
1238
1239                assert_eq!(result, expected_str.map(|s| s.to_string()));
1240            }
1241        }
1242
1243        Ok(())
1244    }
1245
1246    #[test]
1247    fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> {
1248        let field: Arc<Field> = Field::new("arg", Utf8, true).into();
1249        let return_field: Arc<Field> =
1250            Field::new("f", Timestamp(Nanosecond, None), true).into();
1251
1252        let mut options = ConfigOptions::default();
1253        options.execution.time_zone = Some("Invalid/Timezone".to_string());
1254
1255        let args = ScalarFunctionArgs {
1256            args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1257                "2020-09-08T13:42:29Z".to_string(),
1258            )))],
1259            arg_fields: vec![Arc::clone(&field)],
1260            number_rows: 1,
1261            return_field: Arc::clone(&return_field),
1262            config_options: Arc::new(options.clone()),
1263        };
1264
1265        let expected_err =
1266            "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1267
1268        validate_expected_error(&mut options, args, expected_err);
1269
1270        Ok(())
1271    }
1272
1273    #[test]
1274    fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> {
1275        let expr_field: Arc<Field> = Field::new("arg", Utf8, true).into();
1276        let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1277        let return_field: Arc<Field> =
1278            Field::new("f", Timestamp(Nanosecond, None), true).into();
1279
1280        let mut options = ConfigOptions::default();
1281        options.execution.time_zone = Some("Invalid/Timezone".to_string());
1282
1283        let expected_err =
1284            "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1285
1286        let make_args = |value: &str, format: &str| ScalarFunctionArgs {
1287            args: vec![
1288                ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1289                ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))),
1290            ],
1291            arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1292            number_rows: 1,
1293            return_field: Arc::clone(&return_field),
1294            config_options: Arc::new(options.clone()),
1295        };
1296
1297        for (value, format, _expected_str) in [
1298            (
1299                "2020-09-08 09:42:29 -05:00",
1300                "%Y-%m-%d %H:%M:%S %z",
1301                Some("2020-09-08 09:42:29 -05:00"),
1302            ),
1303            (
1304                "2020-09-08T13:42:29Z",
1305                "%+",
1306                Some("2020-09-08 08:42:29 -05:00"),
1307            ),
1308            (
1309                "2020-09-08 13:42:29 +0000",
1310                "%Y-%m-%d %H:%M:%S %z",
1311                Some("2020-09-08 08:42:29 -05:00"),
1312            ),
1313            (
1314                "+0000 2024-01-01 12:00:00",
1315                "%z %Y-%m-%d %H:%M:%S",
1316                Some("2024-01-01 07:00:00 -05:00"),
1317            ),
1318            (
1319                "20200908134229+0100",
1320                "%Y%m%d%H%M%S%z",
1321                Some("2020-09-08 07:42:29 -05:00"),
1322            ),
1323            (
1324                "2020-09-08+0230 13:42",
1325                "%Y-%m-%d%z %H:%M",
1326                Some("2020-09-08 06:12:00 -05:00"),
1327            ),
1328        ] {
1329            let args = make_args(value, format);
1330            validate_expected_error(&mut options.clone(), args, expected_err);
1331        }
1332
1333        let args = ScalarFunctionArgs {
1334            args: vec![
1335                ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1336                    "2020-09-08T13:42:29".to_string(),
1337                ))),
1338                ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1339                    "%Y-%m-%dT%H:%M:%S".to_string(),
1340                ))),
1341            ],
1342            arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1343            number_rows: 1,
1344            return_field: Arc::clone(&return_field),
1345            config_options: Arc::new(options.clone()),
1346        };
1347
1348        validate_expected_error(&mut options.clone(), args, expected_err);
1349
1350        Ok(())
1351    }
1352
1353    #[test]
1354    fn to_timestamp_invalid_input_type() -> Result<()> {
1355        // pass the wrong type of input array to to_timestamp and test
1356        // that we get an error.
1357
1358        let mut builder = Int64Array::builder(1);
1359        builder.append_value(1);
1360        let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
1361
1362        let expected_err =
1363            "Execution error: Unsupported data type Int64 for function to_timestamp";
1364        match to_timestamp(&[int64array]) {
1365            Ok(_) => panic!("Expected error but got success"),
1366            Err(e) => {
1367                assert!(
1368                    e.to_string().contains(expected_err),
1369                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1370                );
1371            }
1372        }
1373        Ok(())
1374    }
1375
1376    #[test]
1377    fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
1378        // pass the wrong type of input array to to_timestamp and test
1379        // that we get an error.
1380
1381        let mut builder = Int64Array::builder(1);
1382        builder.append_value(1);
1383        let int64array = [
1384            ColumnarValue::Array(Arc::new(builder.finish())),
1385            ColumnarValue::Array(Arc::new(builder.finish())),
1386        ];
1387
1388        let expected_err =
1389            "Execution error: Unsupported data type Int64 for function to_timestamp";
1390        match to_timestamp(&int64array) {
1391            Ok(_) => panic!("Expected error but got success"),
1392            Err(e) => {
1393                assert!(
1394                    e.to_string().contains(expected_err),
1395                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1396                );
1397            }
1398        }
1399        Ok(())
1400    }
1401
1402    #[test]
1403    fn to_timestamp_with_unparsable_data() -> Result<()> {
1404        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1405
1406        date_string_builder.append_null();
1407
1408        date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
1409
1410        let string_array =
1411            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1412
1413        let expected_err = "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
1414        match to_timestamp(&[string_array]) {
1415            Ok(_) => panic!("Expected error but got success"),
1416            Err(e) => {
1417                assert!(
1418                    e.to_string().contains(expected_err),
1419                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1420                );
1421            }
1422        }
1423        Ok(())
1424    }
1425
1426    #[test]
1427    fn to_timestamp_with_invalid_tz() -> Result<()> {
1428        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1429
1430        date_string_builder.append_null();
1431
1432        date_string_builder.append_value("2020-09-08T13:42:29ZZ");
1433
1434        let string_array =
1435            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1436
1437        let expected_err = "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
1438        match to_timestamp(&[string_array]) {
1439            Ok(_) => panic!("Expected error but got success"),
1440            Err(e) => {
1441                assert!(
1442                    e.to_string().contains(expected_err),
1443                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1444                );
1445            }
1446        }
1447        Ok(())
1448    }
1449
1450    #[test]
1451    fn to_timestamp_with_no_matching_formats() -> Result<()> {
1452        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1453        let mut format1_builder = StringBuilder::with_capacity(2, 1024);
1454        let mut format2_builder = StringBuilder::with_capacity(2, 1024);
1455        let mut format3_builder = StringBuilder::with_capacity(2, 1024);
1456
1457        date_string_builder.append_null();
1458        format1_builder.append_null();
1459        format2_builder.append_null();
1460        format3_builder.append_null();
1461
1462        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1463        format1_builder.append_value("%s");
1464        format2_builder.append_value("%c");
1465        format3_builder.append_value("%H:%M:%S");
1466
1467        let string_array = [
1468            ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
1469            ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
1470            ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
1471            ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
1472        ];
1473
1474        let expected_err = "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
1475        match to_timestamp(&string_array) {
1476            Ok(_) => panic!("Expected error but got success"),
1477            Err(e) => {
1478                assert!(
1479                    e.to_string().contains(expected_err),
1480                    "Can not find expected error '{expected_err}'. Actual error '{e}'"
1481                );
1482            }
1483        }
1484        Ok(())
1485    }
1486
1487    #[test]
1488    fn string_to_timestamp_formatted() {
1489        // Explicit timezone
1490        assert_eq!(
1491            1599572549190855000,
1492            parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
1493        );
1494        assert_eq!(
1495            1599572549190855000,
1496            parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
1497        );
1498        assert_eq!(
1499            1599572549000000000,
1500            parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
1501        ); // no fractional part
1502        assert_eq!(
1503            1599590549190855000,
1504            parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
1505        );
1506        assert_eq!(
1507            1599590549000000000,
1508            parse_timestamp_formatted("1599590549", "%s").unwrap()
1509        );
1510        assert_eq!(
1511            1599572549000000000,
1512            parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
1513                .unwrap()
1514        );
1515        assert_eq!(
1516            1642896000000000000,
1517            parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
1518        );
1519    }
1520
1521    fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
1522        let result = string_to_timestamp_nanos_formatted_with_timezone(
1523            &Some("UTC".parse()?),
1524            s,
1525            format,
1526        );
1527        if let Err(e) = &result {
1528            eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
1529        }
1530        result
1531    }
1532
1533    #[test]
1534    fn string_to_timestamp_formatted_invalid() {
1535        // Test parsing invalid formats
1536        let cases = [
1537            ("", "%Y%m%d %H%M%S", "premature end of input"),
1538            ("SS", "%c", "premature end of input"),
1539            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1540            (
1541                "Wed, 18 Feb 2015 23:16:09 GMT",
1542                "%XX",
1543                "input contains invalid characters",
1544            ),
1545            (
1546                "Wed, 18 Feb 2015 23:16:09 GMT",
1547                "%Y%m%d %H%M%S",
1548                "input contains invalid characters",
1549            ),
1550        ];
1551
1552        for (s, f, ctx) in cases {
1553            let expected = format!(
1554                "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1555            );
1556            let actual = string_to_datetime_formatted(&Utc, s, f)
1557                .unwrap_err()
1558                .strip_backtrace();
1559            assert_eq!(actual, expected)
1560        }
1561    }
1562
1563    #[test]
1564    fn string_to_timestamp_invalid_arguments() {
1565        // Test parsing invalid formats
1566        let cases = [
1567            ("", "%Y%m%d %H%M%S", "premature end of input"),
1568            ("SS", "%c", "premature end of input"),
1569            ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1570            (
1571                "Wed, 18 Feb 2015 23:16:09 GMT",
1572                "%XX",
1573                "input contains invalid characters",
1574            ),
1575            (
1576                "Wed, 18 Feb 2015 23:16:09 GMT",
1577                "%Y%m%d %H%M%S",
1578                "input contains invalid characters",
1579            ),
1580        ];
1581
1582        for (s, f, ctx) in cases {
1583            let expected = format!(
1584                "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1585            );
1586            let actual = string_to_datetime_formatted(&Utc, s, f)
1587                .unwrap_err()
1588                .strip_backtrace();
1589            assert_eq!(actual, expected)
1590        }
1591    }
1592
1593    #[test]
1594    fn test_no_tz() {
1595        let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
1596            Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
1597            Box::new(ToTimestampSecondsFunc::new_with_config(
1598                &ConfigOptions::default(),
1599            )),
1600            Box::new(ToTimestampMillisFunc::new_with_config(
1601                &ConfigOptions::default(),
1602            )),
1603            Box::new(ToTimestampNanosFunc::new_with_config(
1604                &ConfigOptions::default(),
1605            )),
1606            Box::new(ToTimestampSecondsFunc::new_with_config(
1607                &ConfigOptions::default(),
1608            )),
1609        ];
1610
1611        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1612        let mut millis_builder = TimestampMillisecondArray::builder(2);
1613        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1614        let mut sec_builder = TimestampSecondArray::builder(2);
1615
1616        nanos_builder.append_value(1599572549190850000);
1617        millis_builder.append_value(1599572549190);
1618        micros_builder.append_value(1599572549190850);
1619        sec_builder.append_value(1599572549);
1620
1621        let nanos_timestamps =
1622            Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
1623        let millis_timestamps =
1624            Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1625        let micros_timestamps =
1626            Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1627        let sec_timestamps =
1628            Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1629
1630        let arrays = &[
1631            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1632            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1633            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
1634            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1635        ];
1636
1637        for udf in &udfs {
1638            for array in arrays {
1639                let rt = udf.return_type(&[array.data_type()]).unwrap();
1640                let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1641                assert!(matches!(rt, Timestamp(_, None)));
1642                let args = ScalarFunctionArgs {
1643                    args: vec![array.clone()],
1644                    arg_fields: vec![arg_field],
1645                    number_rows: 4,
1646                    return_field: Field::new("f", rt, true).into(),
1647                    config_options: Arc::new(ConfigOptions::default()),
1648                };
1649                let res = udf
1650                    .invoke_with_args(args)
1651                    .expect("that to_timestamp parsed values without error");
1652                let array = match res {
1653                    ColumnarValue::Array(res) => res,
1654                    _ => panic!("Expected a columnar array"),
1655                };
1656                let ty = array.data_type();
1657                assert!(matches!(ty, Timestamp(_, None)));
1658            }
1659        }
1660
1661        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1662        let mut millis_builder = TimestampMillisecondArray::builder(2);
1663        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1664        let mut sec_builder = TimestampSecondArray::builder(2);
1665        let mut i64_builder = Int64Array::builder(2);
1666
1667        nanos_builder.append_value(1599572549190850000);
1668        millis_builder.append_value(1599572549190);
1669        micros_builder.append_value(1599572549190850);
1670        sec_builder.append_value(1599572549);
1671        i64_builder.append_value(1599572549);
1672
1673        let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1674        let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1675        let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1676        let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1677        let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1678
1679        let arrays = &[
1680            ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1681            ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1682            ColumnarValue::Array(Arc::clone(&micros_timestamps)),
1683            ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1684            ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1685        ];
1686
1687        for udf in &udfs {
1688            for array in arrays {
1689                let rt = udf.return_type(&[array.data_type()]).unwrap();
1690                assert!(matches!(rt, Timestamp(_, None)));
1691                let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1692                let args = ScalarFunctionArgs {
1693                    args: vec![array.clone()],
1694                    arg_fields: vec![arg_field],
1695                    number_rows: 5,
1696                    return_field: Field::new("f", rt, true).into(),
1697                    config_options: Arc::new(ConfigOptions::default()),
1698                };
1699                let res = udf
1700                    .invoke_with_args(args)
1701                    .expect("that to_timestamp parsed values without error");
1702                let array = match res {
1703                    ColumnarValue::Array(res) => res,
1704                    _ => panic!("Expected a columnar array"),
1705                };
1706                let ty = array.data_type();
1707                assert!(matches!(ty, Timestamp(_, None)));
1708            }
1709        }
1710    }
1711
1712    #[test]
1713    fn test_to_timestamp_arg_validation() {
1714        let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1715        date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1716
1717        let data = date_string_builder.finish();
1718
1719        let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1720            (Arc::new(to_timestamp), Nanosecond),
1721            (Arc::new(to_timestamp_micros), Microsecond),
1722            (Arc::new(to_timestamp_millis), Millisecond),
1723            (Arc::new(to_timestamp_nanos), Nanosecond),
1724            (Arc::new(to_timestamp_seconds), Second),
1725        ];
1726
1727        let mut nanos_builder = TimestampNanosecondArray::builder(2);
1728        let mut millis_builder = TimestampMillisecondArray::builder(2);
1729        let mut micros_builder = TimestampMicrosecondArray::builder(2);
1730        let mut sec_builder = TimestampSecondArray::builder(2);
1731
1732        nanos_builder.append_value(1599572549190850000);
1733        millis_builder.append_value(1599572549190);
1734        micros_builder.append_value(1599572549190850);
1735        sec_builder.append_value(1599572549);
1736
1737        let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1738        let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1739        let micros_expected_timestamps = &micros_builder.finish() as &dyn Array;
1740        let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1741
1742        for (func, time_unit) in funcs {
1743            // test UTF8
1744            let string_array = [
1745                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1746                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1747                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1748                ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1749            ];
1750            let parsed_timestamps = func(&string_array)
1751                .expect("that to_timestamp with format args parsed values without error");
1752            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1753                assert_eq!(parsed_array.len(), 1);
1754                match time_unit {
1755                    Nanosecond => {
1756                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1757                    }
1758                    Millisecond => {
1759                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1760                    }
1761                    Microsecond => {
1762                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1763                    }
1764                    Second => {
1765                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1766                    }
1767                };
1768            } else {
1769                panic!("Expected a columnar array")
1770            }
1771
1772            // test LargeUTF8
1773            let string_array = [
1774                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1775                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1776                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1777                ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1778            ];
1779            let parsed_timestamps = func(&string_array)
1780                .expect("that to_timestamp with format args parsed values without error");
1781            if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1782                assert_eq!(parsed_array.len(), 1);
1783                assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1784
1785                match time_unit {
1786                    Nanosecond => {
1787                        assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1788                    }
1789                    Millisecond => {
1790                        assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1791                    }
1792                    Microsecond => {
1793                        assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1794                    }
1795                    Second => {
1796                        assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1797                    }
1798                };
1799            } else {
1800                panic!("Expected a columnar array")
1801            }
1802
1803            // test other types
1804            let string_array = [
1805                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1806                ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1807                ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1808                ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1809            ];
1810
1811            let expected = "Unsupported data type Int32 for function".to_string();
1812            let actual = func(&string_array).unwrap_err().to_string();
1813            assert_contains!(actual, expected);
1814
1815            // test other types
1816            let string_array = [
1817                ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1818                ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1819                    vec![1i64].into(),
1820                    None,
1821                )) as ArrayRef),
1822            ];
1823
1824            let expected = "Unsupported data type".to_string();
1825            let actual = func(&string_array).unwrap_err().to_string();
1826            assert_contains!(actual, expected);
1827        }
1828    }
1829
1830    #[test]
1831    fn test_decimal_to_nanoseconds_negative_scale() {
1832        // scale -2: internal value 5 represents 5 * 10^2 = 500 seconds
1833        let nanos = decimal_to_nanoseconds(5, -2);
1834        assert_eq!(nanos, 500_000_000_000); // 500 seconds in nanoseconds
1835
1836        // scale -1: internal value 10 represents 10 * 10^1 = 100 seconds
1837        let nanos = decimal_to_nanoseconds(10, -1);
1838        assert_eq!(nanos, 100_000_000_000);
1839
1840        // scale 0: internal value 5 represents 5 seconds
1841        let nanos = decimal_to_nanoseconds(5, 0);
1842        assert_eq!(nanos, 5_000_000_000);
1843
1844        // scale 3: internal value 1500 represents 1.5 seconds
1845        let nanos = decimal_to_nanoseconds(1500, 3);
1846        assert_eq!(nanos, 1_500_000_000);
1847    }
1848}