1use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::array::Float64Array;
23use arrow::array::timezone::Tz;
24use arrow::datatypes::DataType::*;
25use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
26use arrow::datatypes::{
27 ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
28 TimestampNanosecondType, TimestampSecondType,
29};
30use datafusion_common::config::ConfigOptions;
31use datafusion_common::format::DEFAULT_CAST_OPTIONS;
32use datafusion_common::{Result, ScalarType, ScalarValue, exec_err};
33use datafusion_expr::{
34 ColumnarValue, Documentation, ScalarUDF, ScalarUDFImpl, Signature, Volatility,
35};
36use datafusion_macros::user_doc;
37
38#[user_doc(
39 doc_section(label = "Time and Date Functions"),
40 description = r#"
41Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
42integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
43if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
44session time zone, or UTC if no session time zone is set.
45Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
46
47Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range
48for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between
49`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
50for the input outside of supported bounds.
51
52The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
53The time zone can be a value like +00:00, 'Europe/London' etc.
54"#,
55 syntax_example = "to_timestamp(expression[, ..., format_n])",
56 sql_example = r#"```sql
57> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
58+-----------------------------------------------------------+
59| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
60+-----------------------------------------------------------+
61| 2023-01-31T14:26:56.123456789 |
62+-----------------------------------------------------------+
63> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
64+--------------------------------------------------------------------------------------------------------+
65| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
66+--------------------------------------------------------------------------------------------------------+
67| 2023-05-17T03:59:00.123456789 |
68+--------------------------------------------------------------------------------------------------------+
69```
70Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
71"#,
72 argument(
73 name = "expression",
74 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
75 ),
76 argument(
77 name = "format_n",
78 description = r#"
79Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
80Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
81parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
82only supported at the end of the string preceded by a space.
83"#
84 )
85)]
86#[derive(Debug, PartialEq, Eq, Hash)]
87pub struct ToTimestampFunc {
88 signature: Signature,
89 timezone: Option<Arc<str>>,
90}
91
92#[user_doc(
93 doc_section(label = "Time and Date Functions"),
94 description = r#"
95Converts a value to a timestamp (`YYYY-MM-DDT00:00:00<TZ>`) in the session time zone. Supports strings,
96integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
97if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
98session time zone, or UTC if no session time zone is set.
99Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
100
101The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
102The time zone can be a value like +00:00, 'Europe/London' etc.
103"#,
104 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
105 sql_example = r#"```sql
106> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
107+-------------------------------------------------------------------+
108| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
109+-------------------------------------------------------------------+
110| 2023-01-31T14:26:56 |
111+-------------------------------------------------------------------+
112> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
113+----------------------------------------------------------------------------------------------------------------+
114| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
115+----------------------------------------------------------------------------------------------------------------+
116| 2023-05-17T03:59:00 |
117+----------------------------------------------------------------------------------------------------------------+
118```
119Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
120"#,
121 argument(
122 name = "expression",
123 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
124 ),
125 argument(
126 name = "format_n",
127 description = r#"
128Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
129Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
130parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
131only supported at the end of the string preceded by a space.
132"#
133 )
134)]
135#[derive(Debug, PartialEq, Eq, Hash)]
136pub struct ToTimestampSecondsFunc {
137 signature: Signature,
138 timezone: Option<Arc<str>>,
139}
140
141#[user_doc(
142 doc_section(label = "Time and Date Functions"),
143 description = r#"
144Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000<TZ>`) in the session time zone. Supports strings,
145integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
146if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
147session time zone, or UTC if no session time zone is set.
148Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
149
150The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
151The time zone can be a value like +00:00, 'Europe/London' etc.
152"#,
153 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
154 sql_example = r#"```sql
155> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
156+------------------------------------------------------------------+
157| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
158+------------------------------------------------------------------+
159| 2023-01-31T14:26:56.123 |
160+------------------------------------------------------------------+
161> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
162+---------------------------------------------------------------------------------------------------------------+
163| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
164+---------------------------------------------------------------------------------------------------------------+
165| 2023-05-17T03:59:00.123 |
166+---------------------------------------------------------------------------------------------------------------+
167```
168Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
169"#,
170 argument(
171 name = "expression",
172 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
173 ),
174 argument(
175 name = "format_n",
176 description = r#"
177Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
178Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
179parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
180only supported at the end of the string preceded by a space.
181"#
182 )
183)]
184#[derive(Debug, PartialEq, Eq, Hash)]
185pub struct ToTimestampMillisFunc {
186 signature: Signature,
187 timezone: Option<Arc<str>>,
188}
189
190#[user_doc(
191 doc_section(label = "Time and Date Functions"),
192 description = r#"
193Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
194integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
195if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
196session time zone, or UTC if no session time zone is set.
197Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`).
198
199The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
200The time zone can be a value like +00:00, 'Europe/London' etc.
201"#,
202 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
203 sql_example = r#"```sql
204> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
205+------------------------------------------------------------------+
206| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
207+------------------------------------------------------------------+
208| 2023-01-31T14:26:56.123456 |
209+------------------------------------------------------------------+
210> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
211+---------------------------------------------------------------------------------------------------------------+
212| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
213+---------------------------------------------------------------------------------------------------------------+
214| 2023-05-17T03:59:00.123456 |
215+---------------------------------------------------------------------------------------------------------------+
216```
217Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
218"#,
219 argument(
220 name = "expression",
221 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
222 ),
223 argument(
224 name = "format_n",
225 description = r#"
226Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
227Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
228parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
229only supported at the end of the string preceded by a space.
230"#
231 )
232)]
233#[derive(Debug, PartialEq, Eq, Hash)]
234pub struct ToTimestampMicrosFunc {
235 signature: Signature,
236 timezone: Option<Arc<str>>,
237}
238
239#[user_doc(
240 doc_section(label = "Time and Date Functions"),
241 description = r#"
242Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000<TZ>`) in the session time zone. Supports strings,
243integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
244if no [Chrono formats] are provided. Strings that parse without a time zone are treated as if they are in the
245session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
246
247The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
248The time zone can be a value like +00:00, 'Europe/London' etc.
249"#,
250 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
251 sql_example = r#"```sql
252> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
253+-----------------------------------------------------------------+
254| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
255+-----------------------------------------------------------------+
256| 2023-01-31T14:26:56.123456789 |
257+-----------------------------------------------------------------+
258> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
259+--------------------------------------------------------------------------------------------------------------+
260| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
261+--------------------------------------------------------------------------------------------------------------+
262| 2023-05-17T03:59:00.123456789 |
263+---------------------------------------------------------------------------------------------------------------+
264```
265Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
266"#,
267 argument(
268 name = "expression",
269 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
270 ),
271 argument(
272 name = "format_n",
273 description = r#"
274Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
275Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
276parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
277only supported at the end of the string preceded by a space.
278"#
279 )
280)]
281#[derive(Debug, PartialEq, Eq, Hash)]
282pub struct ToTimestampNanosFunc {
283 signature: Signature,
284 timezone: Option<Arc<str>>,
285}
286
287macro_rules! impl_to_timestamp_constructors {
290 ($func:ty) => {
291 impl Default for $func {
292 fn default() -> Self {
293 Self::new_with_config(&ConfigOptions::default())
294 }
295 }
296
297 impl $func {
298 #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")]
299 pub fn new() -> Self {
305 Self::new_with_config(&ConfigOptions::default())
306 }
307
308 pub fn new_with_config(config: &ConfigOptions) -> Self {
309 Self {
310 signature: Signature::variadic_any(Volatility::Immutable),
311 timezone: config
312 .execution
313 .time_zone
314 .as_ref()
315 .map(|tz| Arc::from(tz.as_str())),
316 }
317 }
318 }
319 };
320}
321
322impl_to_timestamp_constructors!(ToTimestampFunc);
323impl_to_timestamp_constructors!(ToTimestampSecondsFunc);
324impl_to_timestamp_constructors!(ToTimestampMillisFunc);
325impl_to_timestamp_constructors!(ToTimestampMicrosFunc);
326impl_to_timestamp_constructors!(ToTimestampNanosFunc);
327
328macro_rules! impl_with_updated_config {
336 () => {
337 fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> {
338 Some(Self::new_with_config(config).into())
339 }
340 };
341}
342
343impl ScalarUDFImpl for ToTimestampFunc {
344 fn as_any(&self) -> &dyn Any {
345 self
346 }
347
348 fn name(&self) -> &str {
349 "to_timestamp"
350 }
351
352 fn signature(&self) -> &Signature {
353 &self.signature
354 }
355
356 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
357 Ok(Timestamp(Nanosecond, self.timezone.clone()))
358 }
359
360 impl_with_updated_config!();
361
362 fn invoke_with_args(
363 &self,
364 args: datafusion_expr::ScalarFunctionArgs,
365 ) -> Result<ColumnarValue> {
366 let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
367
368 if args.is_empty() {
369 return exec_err!(
370 "to_timestamp function requires 1 or more arguments, got {}",
371 args.len()
372 );
373 }
374
375 if args.len() > 1 {
377 validate_data_types(&args, "to_timestamp")?;
378 }
379
380 let tz = self.timezone.clone();
381
382 match args[0].data_type() {
383 Int32 | Int64 => args[0]
384 .cast_to(&Timestamp(Second, None), None)?
385 .cast_to(&Timestamp(Nanosecond, tz), None),
386 Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None),
387 Float64 => {
388 let rescaled = arrow::compute::kernels::numeric::mul(
389 &args[0].to_array(1)?,
390 &arrow::array::Scalar::new(Float64Array::from(vec![
391 1_000_000_000f64,
392 ])),
393 )?;
394 Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
395 &rescaled,
396 &Timestamp(Nanosecond, tz),
397 &DEFAULT_CAST_OPTIONS,
398 )?))
399 }
400 Utf8View | LargeUtf8 | Utf8 => {
401 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp", &tz)
402 }
403 Decimal128(_, _) => {
404 match &args[0] {
405 ColumnarValue::Scalar(ScalarValue::Decimal128(
406 Some(value),
407 _,
408 scale,
409 )) => {
410 let scale_factor = 10_i128.pow(*scale as u32);
412 let seconds = value / scale_factor;
413 let fraction = value % scale_factor;
414 let nanos = (fraction * 1_000_000_000) / scale_factor;
415 let timestamp_nanos = seconds * 1_000_000_000 + nanos;
416
417 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
418 Some(timestamp_nanos as i64),
419 tz,
420 )))
421 }
422 _ => exec_err!("Invalid decimal value"),
423 }
424 }
425 other => {
426 exec_err!("Unsupported data type {other} for function to_timestamp")
427 }
428 }
429 }
430
431 fn documentation(&self) -> Option<&Documentation> {
432 self.doc()
433 }
434}
435
436impl ScalarUDFImpl for ToTimestampSecondsFunc {
437 fn as_any(&self) -> &dyn Any {
438 self
439 }
440
441 fn name(&self) -> &str {
442 "to_timestamp_seconds"
443 }
444
445 fn signature(&self) -> &Signature {
446 &self.signature
447 }
448
449 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
450 Ok(Timestamp(Second, self.timezone.clone()))
451 }
452
453 impl_with_updated_config!();
454
455 fn invoke_with_args(
456 &self,
457 args: datafusion_expr::ScalarFunctionArgs,
458 ) -> Result<ColumnarValue> {
459 let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
460
461 if args.is_empty() {
462 return exec_err!(
463 "to_timestamp_seconds function requires 1 or more arguments, got {}",
464 args.len()
465 );
466 }
467
468 if args.len() > 1 {
470 validate_data_types(&args, "to_timestamp")?;
471 }
472
473 let tz = self.timezone.clone();
474
475 match args[0].data_type() {
476 Null | Int32 | Int64 | Timestamp(_, _) | Decimal128(_, _) => {
477 args[0].cast_to(&Timestamp(Second, tz), None)
478 }
479 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampSecondType>(
480 &args,
481 "to_timestamp_seconds",
482 &self.timezone,
483 ),
484 other => {
485 exec_err!(
486 "Unsupported data type {} for function to_timestamp_seconds",
487 other
488 )
489 }
490 }
491 }
492
493 fn documentation(&self) -> Option<&Documentation> {
494 self.doc()
495 }
496}
497
498impl ScalarUDFImpl for ToTimestampMillisFunc {
499 fn as_any(&self) -> &dyn Any {
500 self
501 }
502
503 fn name(&self) -> &str {
504 "to_timestamp_millis"
505 }
506
507 fn signature(&self) -> &Signature {
508 &self.signature
509 }
510
511 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
512 Ok(Timestamp(Millisecond, self.timezone.clone()))
513 }
514
515 impl_with_updated_config!();
516
517 fn invoke_with_args(
518 &self,
519 args: datafusion_expr::ScalarFunctionArgs,
520 ) -> Result<ColumnarValue> {
521 let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
522
523 if args.is_empty() {
524 return exec_err!(
525 "to_timestamp_millis function requires 1 or more arguments, got {}",
526 args.len()
527 );
528 }
529
530 if args.len() > 1 {
532 validate_data_types(&args, "to_timestamp")?;
533 }
534
535 match args[0].data_type() {
536 Null | Int32 | Int64 | Timestamp(_, _) => {
537 args[0].cast_to(&Timestamp(Millisecond, self.timezone.clone()), None)
538 }
539 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
540 &args,
541 "to_timestamp_millis",
542 &self.timezone,
543 ),
544 other => {
545 exec_err!(
546 "Unsupported data type {} for function to_timestamp_millis",
547 other
548 )
549 }
550 }
551 }
552
553 fn documentation(&self) -> Option<&Documentation> {
554 self.doc()
555 }
556}
557
558impl ScalarUDFImpl for ToTimestampMicrosFunc {
559 fn as_any(&self) -> &dyn Any {
560 self
561 }
562
563 fn name(&self) -> &str {
564 "to_timestamp_micros"
565 }
566
567 fn signature(&self) -> &Signature {
568 &self.signature
569 }
570
571 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
572 Ok(Timestamp(Microsecond, self.timezone.clone()))
573 }
574
575 impl_with_updated_config!();
576
577 fn invoke_with_args(
578 &self,
579 args: datafusion_expr::ScalarFunctionArgs,
580 ) -> Result<ColumnarValue> {
581 let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
582
583 if args.is_empty() {
584 return exec_err!(
585 "to_timestamp_micros function requires 1 or more arguments, got {}",
586 args.len()
587 );
588 }
589
590 if args.len() > 1 {
592 validate_data_types(&args, "to_timestamp")?;
593 }
594
595 match args[0].data_type() {
596 Null | Int32 | Int64 | Timestamp(_, _) => {
597 args[0].cast_to(&Timestamp(Microsecond, self.timezone.clone()), None)
598 }
599 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
600 &args,
601 "to_timestamp_micros",
602 &self.timezone,
603 ),
604 other => {
605 exec_err!(
606 "Unsupported data type {} for function to_timestamp_micros",
607 other
608 )
609 }
610 }
611 }
612
613 fn documentation(&self) -> Option<&Documentation> {
614 self.doc()
615 }
616}
617
618impl ScalarUDFImpl for ToTimestampNanosFunc {
619 fn as_any(&self) -> &dyn Any {
620 self
621 }
622
623 fn name(&self) -> &str {
624 "to_timestamp_nanos"
625 }
626
627 fn signature(&self) -> &Signature {
628 &self.signature
629 }
630
631 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
632 Ok(Timestamp(Nanosecond, self.timezone.clone()))
633 }
634
635 impl_with_updated_config!();
636
637 fn invoke_with_args(
638 &self,
639 args: datafusion_expr::ScalarFunctionArgs,
640 ) -> Result<ColumnarValue> {
641 let datafusion_expr::ScalarFunctionArgs { args, .. } = args;
642
643 if args.is_empty() {
644 return exec_err!(
645 "to_timestamp_nanos function requires 1 or more arguments, got {}",
646 args.len()
647 );
648 }
649
650 if args.len() > 1 {
652 validate_data_types(&args, "to_timestamp")?;
653 }
654
655 match args[0].data_type() {
656 Null | Int32 | Int64 | Timestamp(_, _) => {
657 args[0].cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None)
658 }
659 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampNanosecondType>(
660 &args,
661 "to_timestamp_nanos",
662 &self.timezone,
663 ),
664 other => {
665 exec_err!(
666 "Unsupported data type {} for function to_timestamp_nanos",
667 other
668 )
669 }
670 }
671 }
672
673 fn documentation(&self) -> Option<&Documentation> {
674 self.doc()
675 }
676}
677
678fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
679 args: &[ColumnarValue],
680 name: &str,
681 timezone: &Option<Arc<str>>,
682) -> Result<ColumnarValue> {
683 let factor = match T::UNIT {
684 Second => 1_000_000_000,
685 Millisecond => 1_000_000,
686 Microsecond => 1_000,
687 Nanosecond => 1,
688 };
689
690 let tz = match timezone.clone() {
691 Some(tz) => Some(tz.parse::<Tz>()?),
692 None => None,
693 };
694
695 match args.len() {
696 1 => handle::<T, _>(
697 args,
698 move |s| string_to_timestamp_nanos_with_timezone(&tz, s).map(|n| n / factor),
699 name,
700 &Timestamp(T::UNIT, timezone.clone()),
701 ),
702 n if n >= 2 => handle_multiple::<T, _, _>(
703 args,
704 move |s, format| {
705 string_to_timestamp_nanos_formatted_with_timezone(&tz, s, format)
706 },
707 |n| n / factor,
708 name,
709 &Timestamp(T::UNIT, timezone.clone()),
710 ),
711 _ => exec_err!("Unsupported 0 argument count for function {name}"),
712 }
713}
714
715#[cfg(test)]
716mod tests {
717 use std::sync::Arc;
718
719 use arrow::array::types::Int64Type;
720 use arrow::array::{
721 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
722 TimestampNanosecondArray, TimestampSecondArray,
723 };
724 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
725 use arrow::datatypes::{Field, TimeUnit};
726 use chrono::{DateTime, FixedOffset, Utc};
727 use datafusion_common::config::ConfigOptions;
728 use datafusion_common::{DataFusionError, ScalarValue, assert_contains};
729 use datafusion_expr::{ScalarFunctionArgs, ScalarFunctionImplementation};
730
731 use super::*;
732
733 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
734 let timezone: Option<Arc<str>> = Some("UTC".into());
735 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp", &timezone)
736 }
737
738 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
740 let timezone: Option<Arc<str>> = Some("UTC".into());
741 to_timestamp_impl::<TimestampMillisecondType>(
742 args,
743 "to_timestamp_millis",
744 &timezone,
745 )
746 }
747
748 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
750 let timezone: Option<Arc<str>> = Some("UTC".into());
751 to_timestamp_impl::<TimestampMicrosecondType>(
752 args,
753 "to_timestamp_micros",
754 &timezone,
755 )
756 }
757
758 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
760 let timezone: Option<Arc<str>> = Some("UTC".into());
761 to_timestamp_impl::<TimestampNanosecondType>(
762 args,
763 "to_timestamp_nanos",
764 &timezone,
765 )
766 }
767
768 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
770 let timezone: Option<Arc<str>> = Some("UTC".into());
771 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds", &timezone)
772 }
773
774 fn udfs_and_timeunit() -> Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> {
775 let udfs: Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> = vec![
776 (
777 Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
778 Nanosecond,
779 ),
780 (
781 Box::new(ToTimestampSecondsFunc::new_with_config(
782 &ConfigOptions::default(),
783 )),
784 Second,
785 ),
786 (
787 Box::new(ToTimestampMillisFunc::new_with_config(
788 &ConfigOptions::default(),
789 )),
790 Millisecond,
791 ),
792 (
793 Box::new(ToTimestampMicrosFunc::new_with_config(
794 &ConfigOptions::default(),
795 )),
796 Microsecond,
797 ),
798 (
799 Box::new(ToTimestampNanosFunc::new_with_config(
800 &ConfigOptions::default(),
801 )),
802 Nanosecond,
803 ),
804 ];
805 udfs
806 }
807
808 fn validate_expected_error(
809 options: &mut ConfigOptions,
810 args: ScalarFunctionArgs,
811 expected_err: &str,
812 ) {
813 let udfs = udfs_and_timeunit();
814
815 for (udf, _) in udfs {
816 match udf
817 .with_updated_config(options)
818 .unwrap()
819 .invoke_with_args(args.clone())
820 {
821 Ok(_) => panic!("Expected error but got success"),
822 Err(e) => {
823 assert!(
824 e.to_string().contains(expected_err),
825 "Can not find expected error '{expected_err}'. Actual error '{e}'"
826 );
827 }
828 }
829 }
830 }
831
832 #[test]
833 fn to_timestamp_arrays_and_nulls() -> Result<()> {
834 let mut string_builder = StringBuilder::with_capacity(2, 1024);
837 let mut ts_builder = TimestampNanosecondArray::builder(2);
838
839 string_builder.append_value("2020-09-08T13:42:29.190855");
840 ts_builder.append_value(1599572549190855000);
841
842 string_builder.append_null();
843 ts_builder.append_null();
844 let expected_timestamps = &ts_builder.finish() as &dyn Array;
845
846 let string_array =
847 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
848 let parsed_timestamps = to_timestamp(&[string_array])
849 .expect("that to_timestamp parsed values without error");
850 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
851 assert_eq!(parsed_array.len(), 2);
852 assert_eq!(expected_timestamps, parsed_array.as_ref());
853 } else {
854 panic!("Expected a columnar array")
855 }
856 Ok(())
857 }
858
859 #[test]
860 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
861 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
864 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
865 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
866 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
867 let mut ts_builder = TimestampNanosecondArray::builder(2);
868
869 date_string_builder.append_null();
870 format1_builder.append_null();
871 format2_builder.append_null();
872 format3_builder.append_null();
873 ts_builder.append_null();
874
875 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
876 format1_builder.append_value("%s");
877 format2_builder.append_value("%c");
878 format3_builder.append_value("%+");
879 ts_builder.append_value(1599572549190850000);
880
881 let expected_timestamps = &ts_builder.finish() as &dyn Array;
882
883 let string_array = [
884 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
885 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
886 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
887 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
888 ];
889 let parsed_timestamps = to_timestamp(&string_array)
890 .expect("that to_timestamp with format args parsed values without error");
891 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
892 assert_eq!(parsed_array.len(), 2);
893 assert_eq!(expected_timestamps, parsed_array.as_ref());
894 } else {
895 panic!("Expected a columnar array")
896 }
897 Ok(())
898 }
899
900 #[test]
901 fn to_timestamp_respects_execution_timezone() -> Result<()> {
902 let udfs = udfs_and_timeunit();
903
904 let mut options = ConfigOptions::default();
905 options.execution.time_zone = Some("-05:00".to_string());
906
907 let time_zone: Option<Arc<str>> = options
908 .execution
909 .time_zone
910 .as_ref()
911 .map(|tz| Arc::from(tz.as_str()));
912
913 for (udf, time_unit) in udfs {
914 let field = Field::new("arg", Utf8, true).into();
915
916 let args = ScalarFunctionArgs {
917 args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
918 "2020-09-08T13:42:29".to_string(),
919 )))],
920 arg_fields: vec![field],
921 number_rows: 1,
922 return_field: Field::new(
923 "f",
924 Timestamp(time_unit, Some("-05:00".into())),
925 true,
926 )
927 .into(),
928 config_options: Arc::new(options.clone()),
929 };
930
931 let result = udf
932 .with_updated_config(&options.clone())
933 .unwrap()
934 .invoke_with_args(args)?;
935 let result = match time_unit {
936 Second => {
937 let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
938 Some(value),
939 tz,
940 )) = result
941 else {
942 panic!("expected scalar timestamp");
943 };
944
945 assert_eq!(tz, time_zone);
946
947 value
948 }
949 Millisecond => {
950 let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
951 Some(value),
952 tz,
953 )) = result
954 else {
955 panic!("expected scalar timestamp");
956 };
957
958 assert_eq!(tz, time_zone);
959
960 value
961 }
962 Microsecond => {
963 let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
964 Some(value),
965 tz,
966 )) = result
967 else {
968 panic!("expected scalar timestamp");
969 };
970
971 assert_eq!(tz, time_zone);
972
973 value
974 }
975 Nanosecond => {
976 let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
977 Some(value),
978 tz,
979 )) = result
980 else {
981 panic!("expected scalar timestamp");
982 };
983
984 assert_eq!(tz, time_zone);
985
986 value
987 }
988 };
989
990 let scale = match time_unit {
991 Second => 1_000_000_000,
992 Millisecond => 1_000_000,
993 Microsecond => 1_000,
994 Nanosecond => 1,
995 };
996
997 let offset = FixedOffset::west_opt(5 * 3600).unwrap();
998 let result = Some(
999 DateTime::<Utc>::from_timestamp_nanos(result * scale)
1000 .with_timezone(&offset)
1001 .to_string(),
1002 );
1003
1004 assert_eq!(result, Some("2020-09-08 13:42:29 -05:00".to_string()));
1005 }
1006
1007 Ok(())
1008 }
1009
1010 #[test]
1011 fn to_timestamp_formats_respects_execution_timezone() -> Result<()> {
1012 let udfs = udfs_and_timeunit();
1013
1014 let mut options = ConfigOptions::default();
1015 options.execution.time_zone = Some("-05:00".to_string());
1016
1017 let time_zone: Option<Arc<str>> = options
1018 .execution
1019 .time_zone
1020 .as_ref()
1021 .map(|tz| Arc::from(tz.as_str()));
1022
1023 let expr_field = Field::new("arg", Utf8, true).into();
1024 let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1025
1026 for (udf, time_unit) in udfs {
1027 for (value, format, expected_str) in [
1028 (
1029 "2020-09-08 09:42:29 -05:00",
1030 "%Y-%m-%d %H:%M:%S %z",
1031 Some("2020-09-08 09:42:29 -05:00"),
1032 ),
1033 (
1034 "2020-09-08T13:42:29Z",
1035 "%+",
1036 Some("2020-09-08 08:42:29 -05:00"),
1037 ),
1038 (
1039 "2020-09-08 13:42:29 UTC",
1040 "%Y-%m-%d %H:%M:%S %Z",
1041 Some("2020-09-08 08:42:29 -05:00"),
1042 ),
1043 (
1044 "+0000 2024-01-01 12:00:00",
1045 "%z %Y-%m-%d %H:%M:%S",
1046 Some("2024-01-01 07:00:00 -05:00"),
1047 ),
1048 (
1049 "20200908134229+0100",
1050 "%Y%m%d%H%M%S%z",
1051 Some("2020-09-08 07:42:29 -05:00"),
1052 ),
1053 (
1054 "2020-09-08+0230 13:42",
1055 "%Y-%m-%d%z %H:%M",
1056 Some("2020-09-08 06:12:00 -05:00"),
1057 ),
1058 ] {
1059 let args = ScalarFunctionArgs {
1060 args: vec![
1061 ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1062 ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1063 format.to_string(),
1064 ))),
1065 ],
1066 arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1067 number_rows: 1,
1068 return_field: Field::new(
1069 "f",
1070 Timestamp(time_unit, Some("-05:00".into())),
1071 true,
1072 )
1073 .into(),
1074 config_options: Arc::new(options.clone()),
1075 };
1076 let result = udf
1077 .with_updated_config(&options.clone())
1078 .unwrap()
1079 .invoke_with_args(args)?;
1080 let result = match time_unit {
1081 Second => {
1082 let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
1083 Some(value),
1084 tz,
1085 )) = result
1086 else {
1087 panic!("expected scalar timestamp");
1088 };
1089
1090 assert_eq!(tz, time_zone);
1091
1092 value
1093 }
1094 Millisecond => {
1095 let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
1096 Some(value),
1097 tz,
1098 )) = result
1099 else {
1100 panic!("expected scalar timestamp");
1101 };
1102
1103 assert_eq!(tz, time_zone);
1104
1105 value
1106 }
1107 Microsecond => {
1108 let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
1109 Some(value),
1110 tz,
1111 )) = result
1112 else {
1113 panic!("expected scalar timestamp");
1114 };
1115
1116 assert_eq!(tz, time_zone);
1117
1118 value
1119 }
1120 Nanosecond => {
1121 let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
1122 Some(value),
1123 tz,
1124 )) = result
1125 else {
1126 panic!("expected scalar timestamp");
1127 };
1128
1129 assert_eq!(tz, time_zone);
1130
1131 value
1132 }
1133 };
1134
1135 let scale = match time_unit {
1136 Second => 1_000_000_000,
1137 Millisecond => 1_000_000,
1138 Microsecond => 1_000,
1139 Nanosecond => 1,
1140 };
1141 let offset = FixedOffset::west_opt(5 * 3600).unwrap();
1142 let result = Some(
1143 DateTime::<Utc>::from_timestamp_nanos(result * scale)
1144 .with_timezone(&offset)
1145 .to_string(),
1146 );
1147
1148 assert_eq!(result, expected_str.map(|s| s.to_string()));
1149 }
1150 }
1151
1152 Ok(())
1153 }
1154
1155 #[test]
1156 fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> {
1157 let field: Arc<Field> = Field::new("arg", Utf8, true).into();
1158 let return_field: Arc<Field> =
1159 Field::new("f", Timestamp(Nanosecond, None), true).into();
1160
1161 let mut options = ConfigOptions::default();
1162 options.execution.time_zone = Some("Invalid/Timezone".to_string());
1163
1164 let args = ScalarFunctionArgs {
1165 args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1166 "2020-09-08T13:42:29Z".to_string(),
1167 )))],
1168 arg_fields: vec![Arc::clone(&field)],
1169 number_rows: 1,
1170 return_field: Arc::clone(&return_field),
1171 config_options: Arc::new(options.clone()),
1172 };
1173
1174 let expected_err =
1175 "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1176
1177 validate_expected_error(&mut options, args, expected_err);
1178
1179 Ok(())
1180 }
1181
1182 #[test]
1183 fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> {
1184 let expr_field: Arc<Field> = Field::new("arg", Utf8, true).into();
1185 let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1186 let return_field: Arc<Field> =
1187 Field::new("f", Timestamp(Nanosecond, None), true).into();
1188
1189 let mut options = ConfigOptions::default();
1190 options.execution.time_zone = Some("Invalid/Timezone".to_string());
1191
1192 let expected_err =
1193 "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1194
1195 let make_args = |value: &str, format: &str| ScalarFunctionArgs {
1196 args: vec![
1197 ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1198 ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))),
1199 ],
1200 arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1201 number_rows: 1,
1202 return_field: Arc::clone(&return_field),
1203 config_options: Arc::new(options.clone()),
1204 };
1205
1206 for (value, format, _expected_str) in [
1207 (
1208 "2020-09-08 09:42:29 -05:00",
1209 "%Y-%m-%d %H:%M:%S %z",
1210 Some("2020-09-08 09:42:29 -05:00"),
1211 ),
1212 (
1213 "2020-09-08T13:42:29Z",
1214 "%+",
1215 Some("2020-09-08 08:42:29 -05:00"),
1216 ),
1217 (
1218 "2020-09-08 13:42:29 +0000",
1219 "%Y-%m-%d %H:%M:%S %z",
1220 Some("2020-09-08 08:42:29 -05:00"),
1221 ),
1222 (
1223 "+0000 2024-01-01 12:00:00",
1224 "%z %Y-%m-%d %H:%M:%S",
1225 Some("2024-01-01 07:00:00 -05:00"),
1226 ),
1227 (
1228 "20200908134229+0100",
1229 "%Y%m%d%H%M%S%z",
1230 Some("2020-09-08 07:42:29 -05:00"),
1231 ),
1232 (
1233 "2020-09-08+0230 13:42",
1234 "%Y-%m-%d%z %H:%M",
1235 Some("2020-09-08 06:12:00 -05:00"),
1236 ),
1237 ] {
1238 let args = make_args(value, format);
1239 validate_expected_error(&mut options.clone(), args, expected_err);
1240 }
1241
1242 let args = ScalarFunctionArgs {
1243 args: vec![
1244 ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1245 "2020-09-08T13:42:29".to_string(),
1246 ))),
1247 ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1248 "%Y-%m-%dT%H:%M:%S".to_string(),
1249 ))),
1250 ],
1251 arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1252 number_rows: 1,
1253 return_field: Arc::clone(&return_field),
1254 config_options: Arc::new(options.clone()),
1255 };
1256
1257 validate_expected_error(&mut options.clone(), args, expected_err);
1258
1259 Ok(())
1260 }
1261
1262 #[test]
1263 fn to_timestamp_invalid_input_type() -> Result<()> {
1264 let mut builder = Int64Array::builder(1);
1268 builder.append_value(1);
1269 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
1270
1271 let expected_err =
1272 "Execution error: Unsupported data type Int64 for function to_timestamp";
1273 match to_timestamp(&[int64array]) {
1274 Ok(_) => panic!("Expected error but got success"),
1275 Err(e) => {
1276 assert!(
1277 e.to_string().contains(expected_err),
1278 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1279 );
1280 }
1281 }
1282 Ok(())
1283 }
1284
1285 #[test]
1286 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
1287 let mut builder = Int64Array::builder(1);
1291 builder.append_value(1);
1292 let int64array = [
1293 ColumnarValue::Array(Arc::new(builder.finish())),
1294 ColumnarValue::Array(Arc::new(builder.finish())),
1295 ];
1296
1297 let expected_err =
1298 "Execution error: Unsupported data type Int64 for function to_timestamp";
1299 match to_timestamp(&int64array) {
1300 Ok(_) => panic!("Expected error but got success"),
1301 Err(e) => {
1302 assert!(
1303 e.to_string().contains(expected_err),
1304 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1305 );
1306 }
1307 }
1308 Ok(())
1309 }
1310
1311 #[test]
1312 fn to_timestamp_with_unparsable_data() -> Result<()> {
1313 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1314
1315 date_string_builder.append_null();
1316
1317 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
1318
1319 let string_array =
1320 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1321
1322 let expected_err = "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
1323 match to_timestamp(&[string_array]) {
1324 Ok(_) => panic!("Expected error but got success"),
1325 Err(e) => {
1326 assert!(
1327 e.to_string().contains(expected_err),
1328 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1329 );
1330 }
1331 }
1332 Ok(())
1333 }
1334
1335 #[test]
1336 fn to_timestamp_with_invalid_tz() -> Result<()> {
1337 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1338
1339 date_string_builder.append_null();
1340
1341 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
1342
1343 let string_array =
1344 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1345
1346 let expected_err = "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
1347 match to_timestamp(&[string_array]) {
1348 Ok(_) => panic!("Expected error but got success"),
1349 Err(e) => {
1350 assert!(
1351 e.to_string().contains(expected_err),
1352 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1353 );
1354 }
1355 }
1356 Ok(())
1357 }
1358
1359 #[test]
1360 fn to_timestamp_with_no_matching_formats() -> Result<()> {
1361 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1362 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
1363 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
1364 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
1365
1366 date_string_builder.append_null();
1367 format1_builder.append_null();
1368 format2_builder.append_null();
1369 format3_builder.append_null();
1370
1371 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1372 format1_builder.append_value("%s");
1373 format2_builder.append_value("%c");
1374 format3_builder.append_value("%H:%M:%S");
1375
1376 let string_array = [
1377 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
1378 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
1379 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
1380 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
1381 ];
1382
1383 let expected_err = "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
1384 match to_timestamp(&string_array) {
1385 Ok(_) => panic!("Expected error but got success"),
1386 Err(e) => {
1387 assert!(
1388 e.to_string().contains(expected_err),
1389 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1390 );
1391 }
1392 }
1393 Ok(())
1394 }
1395
1396 #[test]
1397 fn string_to_timestamp_formatted() {
1398 assert_eq!(
1400 1599572549190855000,
1401 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
1402 );
1403 assert_eq!(
1404 1599572549190855000,
1405 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
1406 );
1407 assert_eq!(
1408 1599572549000000000,
1409 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
1410 ); assert_eq!(
1412 1599590549190855000,
1413 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
1414 );
1415 assert_eq!(
1416 1599590549000000000,
1417 parse_timestamp_formatted("1599590549", "%s").unwrap()
1418 );
1419 assert_eq!(
1420 1599572549000000000,
1421 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
1422 .unwrap()
1423 );
1424 assert_eq!(
1425 1642896000000000000,
1426 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
1427 );
1428 }
1429
1430 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
1431 let result = string_to_timestamp_nanos_formatted_with_timezone(
1432 &Some("UTC".parse()?),
1433 s,
1434 format,
1435 );
1436 if let Err(e) = &result {
1437 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
1438 }
1439 result
1440 }
1441
1442 #[test]
1443 fn string_to_timestamp_formatted_invalid() {
1444 let cases = [
1446 ("", "%Y%m%d %H%M%S", "premature end of input"),
1447 ("SS", "%c", "premature end of input"),
1448 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1449 (
1450 "Wed, 18 Feb 2015 23:16:09 GMT",
1451 "%XX",
1452 "input contains invalid characters",
1453 ),
1454 (
1455 "Wed, 18 Feb 2015 23:16:09 GMT",
1456 "%Y%m%d %H%M%S",
1457 "input contains invalid characters",
1458 ),
1459 ];
1460
1461 for (s, f, ctx) in cases {
1462 let expected = format!(
1463 "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1464 );
1465 let actual = string_to_datetime_formatted(&Utc, s, f)
1466 .unwrap_err()
1467 .strip_backtrace();
1468 assert_eq!(actual, expected)
1469 }
1470 }
1471
1472 #[test]
1473 fn string_to_timestamp_invalid_arguments() {
1474 let cases = [
1476 ("", "%Y%m%d %H%M%S", "premature end of input"),
1477 ("SS", "%c", "premature end of input"),
1478 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1479 (
1480 "Wed, 18 Feb 2015 23:16:09 GMT",
1481 "%XX",
1482 "input contains invalid characters",
1483 ),
1484 (
1485 "Wed, 18 Feb 2015 23:16:09 GMT",
1486 "%Y%m%d %H%M%S",
1487 "input contains invalid characters",
1488 ),
1489 ];
1490
1491 for (s, f, ctx) in cases {
1492 let expected = format!(
1493 "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1494 );
1495 let actual = string_to_datetime_formatted(&Utc, s, f)
1496 .unwrap_err()
1497 .strip_backtrace();
1498 assert_eq!(actual, expected)
1499 }
1500 }
1501
1502 #[test]
1503 fn test_no_tz() {
1504 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
1505 Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
1506 Box::new(ToTimestampSecondsFunc::new_with_config(
1507 &ConfigOptions::default(),
1508 )),
1509 Box::new(ToTimestampMillisFunc::new_with_config(
1510 &ConfigOptions::default(),
1511 )),
1512 Box::new(ToTimestampNanosFunc::new_with_config(
1513 &ConfigOptions::default(),
1514 )),
1515 Box::new(ToTimestampSecondsFunc::new_with_config(
1516 &ConfigOptions::default(),
1517 )),
1518 ];
1519
1520 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1521 let mut millis_builder = TimestampMillisecondArray::builder(2);
1522 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1523 let mut sec_builder = TimestampSecondArray::builder(2);
1524
1525 nanos_builder.append_value(1599572549190850000);
1526 millis_builder.append_value(1599572549190);
1527 micros_builder.append_value(1599572549190850);
1528 sec_builder.append_value(1599572549);
1529
1530 let nanos_timestamps =
1531 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
1532 let millis_timestamps =
1533 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1534 let micros_timestamps =
1535 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1536 let sec_timestamps =
1537 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1538
1539 let arrays = &[
1540 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1541 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1542 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1543 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1544 ];
1545
1546 for udf in &udfs {
1547 for array in arrays {
1548 let rt = udf.return_type(&[array.data_type()]).unwrap();
1549 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1550 assert!(matches!(rt, Timestamp(_, None)));
1551 let args = ScalarFunctionArgs {
1552 args: vec![array.clone()],
1553 arg_fields: vec![arg_field],
1554 number_rows: 4,
1555 return_field: Field::new("f", rt, true).into(),
1556 config_options: Arc::new(ConfigOptions::default()),
1557 };
1558 let res = udf
1559 .invoke_with_args(args)
1560 .expect("that to_timestamp parsed values without error");
1561 let array = match res {
1562 ColumnarValue::Array(res) => res,
1563 _ => panic!("Expected a columnar array"),
1564 };
1565 let ty = array.data_type();
1566 assert!(matches!(ty, Timestamp(_, None)));
1567 }
1568 }
1569
1570 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1571 let mut millis_builder = TimestampMillisecondArray::builder(2);
1572 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1573 let mut sec_builder = TimestampSecondArray::builder(2);
1574 let mut i64_builder = Int64Array::builder(2);
1575
1576 nanos_builder.append_value(1599572549190850000);
1577 millis_builder.append_value(1599572549190);
1578 micros_builder.append_value(1599572549190850);
1579 sec_builder.append_value(1599572549);
1580 i64_builder.append_value(1599572549);
1581
1582 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1583 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1584 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1585 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1586 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1587
1588 let arrays = &[
1589 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1590 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1591 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1592 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1593 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1594 ];
1595
1596 for udf in &udfs {
1597 for array in arrays {
1598 let rt = udf.return_type(&[array.data_type()]).unwrap();
1599 assert!(matches!(rt, Timestamp(_, None)));
1600 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1601 let args = ScalarFunctionArgs {
1602 args: vec![array.clone()],
1603 arg_fields: vec![arg_field],
1604 number_rows: 5,
1605 return_field: Field::new("f", rt, true).into(),
1606 config_options: Arc::new(ConfigOptions::default()),
1607 };
1608 let res = udf
1609 .invoke_with_args(args)
1610 .expect("that to_timestamp parsed values without error");
1611 let array = match res {
1612 ColumnarValue::Array(res) => res,
1613 _ => panic!("Expected a columnar array"),
1614 };
1615 let ty = array.data_type();
1616 assert!(matches!(ty, Timestamp(_, None)));
1617 }
1618 }
1619 }
1620
1621 #[test]
1622 fn test_to_timestamp_arg_validation() {
1623 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1624 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1625
1626 let data = date_string_builder.finish();
1627
1628 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1629 (Arc::new(to_timestamp), Nanosecond),
1630 (Arc::new(to_timestamp_micros), Microsecond),
1631 (Arc::new(to_timestamp_millis), Millisecond),
1632 (Arc::new(to_timestamp_nanos), Nanosecond),
1633 (Arc::new(to_timestamp_seconds), Second),
1634 ];
1635
1636 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1637 let mut millis_builder = TimestampMillisecondArray::builder(2);
1638 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1639 let mut sec_builder = TimestampSecondArray::builder(2);
1640
1641 nanos_builder.append_value(1599572549190850000);
1642 millis_builder.append_value(1599572549190);
1643 micros_builder.append_value(1599572549190850);
1644 sec_builder.append_value(1599572549);
1645
1646 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1647 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1648 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1649 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1650
1651 for (func, time_unit) in funcs {
1652 let string_array = [
1654 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1655 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1656 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1657 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1658 ];
1659 let parsed_timestamps = func(&string_array)
1660 .expect("that to_timestamp with format args parsed values without error");
1661 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1662 assert_eq!(parsed_array.len(), 1);
1663 match time_unit {
1664 Nanosecond => {
1665 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1666 }
1667 Millisecond => {
1668 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1669 }
1670 Microsecond => {
1671 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1672 }
1673 Second => {
1674 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1675 }
1676 };
1677 } else {
1678 panic!("Expected a columnar array")
1679 }
1680
1681 let string_array = [
1683 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1684 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1685 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1686 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1687 ];
1688 let parsed_timestamps = func(&string_array)
1689 .expect("that to_timestamp with format args parsed values without error");
1690 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1691 assert_eq!(parsed_array.len(), 1);
1692 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1693
1694 match time_unit {
1695 Nanosecond => {
1696 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1697 }
1698 Millisecond => {
1699 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1700 }
1701 Microsecond => {
1702 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1703 }
1704 Second => {
1705 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1706 }
1707 };
1708 } else {
1709 panic!("Expected a columnar array")
1710 }
1711
1712 let string_array = [
1714 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1715 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1716 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1717 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1718 ];
1719
1720 let expected = "Unsupported data type Int32 for function".to_string();
1721 let actual = func(&string_array).unwrap_err().to_string();
1722 assert_contains!(actual, expected);
1723
1724 let string_array = [
1726 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1727 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1728 vec![1i64].into(),
1729 None,
1730 )) as ArrayRef),
1731 ];
1732
1733 let expected = "Unsupported data type".to_string();
1734 let actual = func(&string_array).unwrap_err().to_string();
1735 assert_contains!(actual, expected);
1736 }
1737 }
1738}