1use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::array::Float64Array;
23use arrow::datatypes::DataType::*;
24use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
25use arrow::datatypes::{
26 ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
27 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
28};
29use datafusion_common::format::DEFAULT_CAST_OPTIONS;
30use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
31use datafusion_expr::{
32 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
33};
34use datafusion_macros::user_doc;
35
36#[user_doc(
37 doc_section(label = "Time and Date Functions"),
38 description = r#"
39Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
40
41Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
42"#,
43 syntax_example = "to_timestamp(expression[, ..., format_n])",
44 sql_example = r#"```sql
45> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
46+-----------------------------------------------------------+
47| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
48+-----------------------------------------------------------+
49| 2023-01-31T14:26:56.123456789 |
50+-----------------------------------------------------------+
51> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
52+--------------------------------------------------------------------------------------------------------+
53| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
54+--------------------------------------------------------------------------------------------------------+
55| 2023-05-17T03:59:00.123456789 |
56+--------------------------------------------------------------------------------------------------------+
57```
58Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
59"#,
60 argument(
61 name = "expression",
62 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
63 ),
64 argument(
65 name = "format_n",
66 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
67 )
68)]
69#[derive(Debug)]
70pub struct ToTimestampFunc {
71 signature: Signature,
72}
73
74#[user_doc(
75 doc_section(label = "Time and Date Functions"),
76 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
77 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
78 sql_example = r#"```sql
79> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
80+-------------------------------------------------------------------+
81| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
82+-------------------------------------------------------------------+
83| 2023-01-31T14:26:56 |
84+-------------------------------------------------------------------+
85> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
86+----------------------------------------------------------------------------------------------------------------+
87| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
88+----------------------------------------------------------------------------------------------------------------+
89| 2023-05-17T03:59:00 |
90+----------------------------------------------------------------------------------------------------------------+
91```
92Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
93"#,
94 argument(
95 name = "expression",
96 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
97 ),
98 argument(
99 name = "format_n",
100 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
101 )
102)]
103#[derive(Debug)]
104pub struct ToTimestampSecondsFunc {
105 signature: Signature,
106}
107
108#[user_doc(
109 doc_section(label = "Time and Date Functions"),
110 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
111 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
112 sql_example = r#"```sql
113> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
114+------------------------------------------------------------------+
115| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
116+------------------------------------------------------------------+
117| 2023-01-31T14:26:56.123 |
118+------------------------------------------------------------------+
119> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
120+---------------------------------------------------------------------------------------------------------------+
121| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
122+---------------------------------------------------------------------------------------------------------------+
123| 2023-05-17T03:59:00.123 |
124+---------------------------------------------------------------------------------------------------------------+
125```
126Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
127"#,
128 argument(
129 name = "expression",
130 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
131 ),
132 argument(
133 name = "format_n",
134 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
135 )
136)]
137#[derive(Debug)]
138pub struct ToTimestampMillisFunc {
139 signature: Signature,
140}
141
142#[user_doc(
143 doc_section(label = "Time and Date Functions"),
144 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
145 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
146 sql_example = r#"```sql
147> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
148+------------------------------------------------------------------+
149| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
150+------------------------------------------------------------------+
151| 2023-01-31T14:26:56.123456 |
152+------------------------------------------------------------------+
153> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
154+---------------------------------------------------------------------------------------------------------------+
155| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
156+---------------------------------------------------------------------------------------------------------------+
157| 2023-05-17T03:59:00.123456 |
158+---------------------------------------------------------------------------------------------------------------+
159```
160Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
161"#,
162 argument(
163 name = "expression",
164 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
165 ),
166 argument(
167 name = "format_n",
168 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
169 )
170)]
171#[derive(Debug)]
172pub struct ToTimestampMicrosFunc {
173 signature: Signature,
174}
175
176#[user_doc(
177 doc_section(label = "Time and Date Functions"),
178 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
179 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
180 sql_example = r#"```sql
181> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
182+-----------------------------------------------------------------+
183| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
184+-----------------------------------------------------------------+
185| 2023-01-31T14:26:56.123456789 |
186+-----------------------------------------------------------------+
187> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
188+--------------------------------------------------------------------------------------------------------------+
189| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
190+--------------------------------------------------------------------------------------------------------------+
191| 2023-05-17T03:59:00.123456789 |
192+---------------------------------------------------------------------------------------------------------------+
193```
194Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
195"#,
196 argument(
197 name = "expression",
198 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
199 ),
200 argument(
201 name = "format_n",
202 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
203 )
204)]
205#[derive(Debug)]
206pub struct ToTimestampNanosFunc {
207 signature: Signature,
208}
209
210impl Default for ToTimestampFunc {
211 fn default() -> Self {
212 Self::new()
213 }
214}
215
216impl ToTimestampFunc {
217 pub fn new() -> Self {
218 Self {
219 signature: Signature::variadic_any(Volatility::Immutable),
220 }
221 }
222}
223
224impl Default for ToTimestampSecondsFunc {
225 fn default() -> Self {
226 Self::new()
227 }
228}
229
230impl ToTimestampSecondsFunc {
231 pub fn new() -> Self {
232 Self {
233 signature: Signature::variadic_any(Volatility::Immutable),
234 }
235 }
236}
237
238impl Default for ToTimestampMillisFunc {
239 fn default() -> Self {
240 Self::new()
241 }
242}
243
244impl ToTimestampMillisFunc {
245 pub fn new() -> Self {
246 Self {
247 signature: Signature::variadic_any(Volatility::Immutable),
248 }
249 }
250}
251
252impl Default for ToTimestampMicrosFunc {
253 fn default() -> Self {
254 Self::new()
255 }
256}
257
258impl ToTimestampMicrosFunc {
259 pub fn new() -> Self {
260 Self {
261 signature: Signature::variadic_any(Volatility::Immutable),
262 }
263 }
264}
265
266impl Default for ToTimestampNanosFunc {
267 fn default() -> Self {
268 Self::new()
269 }
270}
271
272impl ToTimestampNanosFunc {
273 pub fn new() -> Self {
274 Self {
275 signature: Signature::variadic_any(Volatility::Immutable),
276 }
277 }
278}
279
280impl ScalarUDFImpl for ToTimestampFunc {
287 fn as_any(&self) -> &dyn Any {
288 self
289 }
290
291 fn name(&self) -> &str {
292 "to_timestamp"
293 }
294
295 fn signature(&self) -> &Signature {
296 &self.signature
297 }
298
299 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
300 Ok(return_type_for(&arg_types[0], Nanosecond))
301 }
302
303 fn invoke_with_args(
304 &self,
305 args: datafusion_expr::ScalarFunctionArgs,
306 ) -> Result<ColumnarValue> {
307 let args = args.args;
308 if args.is_empty() {
309 return exec_err!(
310 "to_timestamp function requires 1 or more arguments, got {}",
311 args.len()
312 );
313 }
314
315 if args.len() > 1 {
317 validate_data_types(&args, "to_timestamp")?;
318 }
319
320 match args[0].data_type() {
321 Int32 | Int64 => args[0]
322 .cast_to(&Timestamp(Second, None), None)?
323 .cast_to(&Timestamp(Nanosecond, None), None),
324 Null | Timestamp(_, None) => {
325 args[0].cast_to(&Timestamp(Nanosecond, None), None)
326 }
327 Float64 => {
328 let rescaled = arrow::compute::kernels::numeric::mul(
329 &args[0].to_array(1)?,
330 &arrow::array::Scalar::new(Float64Array::from(vec![
331 1_000_000_000f64,
332 ])),
333 )?;
334 Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
335 &rescaled,
336 &Timestamp(Nanosecond, None),
337 &DEFAULT_CAST_OPTIONS,
338 )?))
339 }
340 Timestamp(_, Some(tz)) => {
341 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
342 }
343 Utf8View | LargeUtf8 | Utf8 => {
344 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
345 }
346 Decimal128(_, _) => {
347 match &args[0] {
348 ColumnarValue::Scalar(ScalarValue::Decimal128(
349 Some(value),
350 _,
351 scale,
352 )) => {
353 let scale_factor = 10_i128.pow(*scale as u32);
355 let seconds = value / scale_factor;
356 let fraction = value % scale_factor;
357
358 let nanos = (fraction * 1_000_000_000) / scale_factor;
359
360 let timestamp_nanos = seconds * 1_000_000_000 + nanos;
361
362 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
363 Some(timestamp_nanos as i64),
364 None,
365 )))
366 }
367 _ => exec_err!("Invalid decimal value"),
368 }
369 }
370 other => {
371 exec_err!(
372 "Unsupported data type {:?} for function to_timestamp",
373 other
374 )
375 }
376 }
377 }
378 fn documentation(&self) -> Option<&Documentation> {
379 self.doc()
380 }
381}
382
383impl ScalarUDFImpl for ToTimestampSecondsFunc {
384 fn as_any(&self) -> &dyn Any {
385 self
386 }
387
388 fn name(&self) -> &str {
389 "to_timestamp_seconds"
390 }
391
392 fn signature(&self) -> &Signature {
393 &self.signature
394 }
395
396 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
397 Ok(return_type_for(&arg_types[0], Second))
398 }
399
400 fn invoke_with_args(
401 &self,
402 args: datafusion_expr::ScalarFunctionArgs,
403 ) -> Result<ColumnarValue> {
404 let args = args.args;
405 if args.is_empty() {
406 return exec_err!(
407 "to_timestamp_seconds function requires 1 or more arguments, got {}",
408 args.len()
409 );
410 }
411
412 if args.len() > 1 {
414 validate_data_types(&args, "to_timestamp")?;
415 }
416
417 match args[0].data_type() {
418 Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
419 args[0].cast_to(&Timestamp(Second, None), None)
420 }
421 Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
422 Utf8View | LargeUtf8 | Utf8 => {
423 to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
424 }
425 other => {
426 exec_err!(
427 "Unsupported data type {:?} for function to_timestamp_seconds",
428 other
429 )
430 }
431 }
432 }
433 fn documentation(&self) -> Option<&Documentation> {
434 self.doc()
435 }
436}
437
438impl ScalarUDFImpl for ToTimestampMillisFunc {
439 fn as_any(&self) -> &dyn Any {
440 self
441 }
442
443 fn name(&self) -> &str {
444 "to_timestamp_millis"
445 }
446
447 fn signature(&self) -> &Signature {
448 &self.signature
449 }
450
451 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
452 Ok(return_type_for(&arg_types[0], Millisecond))
453 }
454
455 fn invoke_with_args(
456 &self,
457 args: datafusion_expr::ScalarFunctionArgs,
458 ) -> Result<ColumnarValue> {
459 let args = args.args;
460 if args.is_empty() {
461 return exec_err!(
462 "to_timestamp_millis function requires 1 or more arguments, got {}",
463 args.len()
464 );
465 }
466
467 if args.len() > 1 {
469 validate_data_types(&args, "to_timestamp")?;
470 }
471
472 match args[0].data_type() {
473 Null | Int32 | Int64 | Timestamp(_, None) => {
474 args[0].cast_to(&Timestamp(Millisecond, None), None)
475 }
476 Timestamp(_, Some(tz)) => {
477 args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
478 }
479 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
480 &args,
481 "to_timestamp_millis",
482 ),
483 other => {
484 exec_err!(
485 "Unsupported data type {:?} for function to_timestamp_millis",
486 other
487 )
488 }
489 }
490 }
491 fn documentation(&self) -> Option<&Documentation> {
492 self.doc()
493 }
494}
495
496impl ScalarUDFImpl for ToTimestampMicrosFunc {
497 fn as_any(&self) -> &dyn Any {
498 self
499 }
500
501 fn name(&self) -> &str {
502 "to_timestamp_micros"
503 }
504
505 fn signature(&self) -> &Signature {
506 &self.signature
507 }
508
509 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
510 Ok(return_type_for(&arg_types[0], Microsecond))
511 }
512
513 fn invoke_with_args(
514 &self,
515 args: datafusion_expr::ScalarFunctionArgs,
516 ) -> Result<ColumnarValue> {
517 let args = args.args;
518 if args.is_empty() {
519 return exec_err!(
520 "to_timestamp_micros function requires 1 or more arguments, got {}",
521 args.len()
522 );
523 }
524
525 if args.len() > 1 {
527 validate_data_types(&args, "to_timestamp")?;
528 }
529
530 match args[0].data_type() {
531 Null | Int32 | Int64 | Timestamp(_, None) => {
532 args[0].cast_to(&Timestamp(Microsecond, None), None)
533 }
534 Timestamp(_, Some(tz)) => {
535 args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
536 }
537 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
538 &args,
539 "to_timestamp_micros",
540 ),
541 other => {
542 exec_err!(
543 "Unsupported data type {:?} for function to_timestamp_micros",
544 other
545 )
546 }
547 }
548 }
549 fn documentation(&self) -> Option<&Documentation> {
550 self.doc()
551 }
552}
553
554impl ScalarUDFImpl for ToTimestampNanosFunc {
555 fn as_any(&self) -> &dyn Any {
556 self
557 }
558
559 fn name(&self) -> &str {
560 "to_timestamp_nanos"
561 }
562
563 fn signature(&self) -> &Signature {
564 &self.signature
565 }
566
567 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
568 Ok(return_type_for(&arg_types[0], Nanosecond))
569 }
570
571 fn invoke_with_args(
572 &self,
573 args: datafusion_expr::ScalarFunctionArgs,
574 ) -> Result<ColumnarValue> {
575 let args = args.args;
576 if args.is_empty() {
577 return exec_err!(
578 "to_timestamp_nanos function requires 1 or more arguments, got {}",
579 args.len()
580 );
581 }
582
583 if args.len() > 1 {
585 validate_data_types(&args, "to_timestamp")?;
586 }
587
588 match args[0].data_type() {
589 Null | Int32 | Int64 | Timestamp(_, None) => {
590 args[0].cast_to(&Timestamp(Nanosecond, None), None)
591 }
592 Timestamp(_, Some(tz)) => {
593 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
594 }
595 Utf8View | LargeUtf8 | Utf8 => {
596 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
597 }
598 other => {
599 exec_err!(
600 "Unsupported data type {:?} for function to_timestamp_nanos",
601 other
602 )
603 }
604 }
605 }
606 fn documentation(&self) -> Option<&Documentation> {
607 self.doc()
608 }
609}
610
611fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
614 match arg {
615 Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
616 _ => Timestamp(unit, None),
617 }
618}
619
620fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
621 args: &[ColumnarValue],
622 name: &str,
623) -> Result<ColumnarValue> {
624 let factor = match T::UNIT {
625 Second => 1_000_000_000,
626 Millisecond => 1_000_000,
627 Microsecond => 1_000,
628 Nanosecond => 1,
629 };
630
631 match args.len() {
632 1 => handle::<T, _, T>(
633 args,
634 |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
635 name,
636 ),
637 n if n >= 2 => handle_multiple::<T, _, T, _>(
638 args,
639 string_to_timestamp_nanos_formatted,
640 |n| n / factor,
641 name,
642 ),
643 _ => exec_err!("Unsupported 0 argument count for function {name}"),
644 }
645}
646
647#[cfg(test)]
648mod tests {
649 use std::sync::Arc;
650
651 use arrow::array::types::Int64Type;
652 use arrow::array::{
653 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
654 TimestampNanosecondArray, TimestampSecondArray,
655 };
656 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
657 use arrow::datatypes::{Field, TimeUnit};
658 use chrono::Utc;
659 use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
660 use datafusion_expr::ScalarFunctionImplementation;
661
662 use super::*;
663
664 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
665 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
666 }
667
668 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
670 to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
671 }
672
673 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
675 to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
676 }
677
678 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
680 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
681 }
682
683 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
685 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
686 }
687
688 #[test]
689 fn to_timestamp_arrays_and_nulls() -> Result<()> {
690 let mut string_builder = StringBuilder::with_capacity(2, 1024);
693 let mut ts_builder = TimestampNanosecondArray::builder(2);
694
695 string_builder.append_value("2020-09-08T13:42:29.190855");
696 ts_builder.append_value(1599572549190855000);
697
698 string_builder.append_null();
699 ts_builder.append_null();
700 let expected_timestamps = &ts_builder.finish() as &dyn Array;
701
702 let string_array =
703 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
704 let parsed_timestamps = to_timestamp(&[string_array])
705 .expect("that to_timestamp parsed values without error");
706 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
707 assert_eq!(parsed_array.len(), 2);
708 assert_eq!(expected_timestamps, parsed_array.as_ref());
709 } else {
710 panic!("Expected a columnar array")
711 }
712 Ok(())
713 }
714
715 #[test]
716 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
717 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
720 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
721 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
722 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
723 let mut ts_builder = TimestampNanosecondArray::builder(2);
724
725 date_string_builder.append_null();
726 format1_builder.append_null();
727 format2_builder.append_null();
728 format3_builder.append_null();
729 ts_builder.append_null();
730
731 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
732 format1_builder.append_value("%s");
733 format2_builder.append_value("%c");
734 format3_builder.append_value("%+");
735 ts_builder.append_value(1599572549190850000);
736
737 let expected_timestamps = &ts_builder.finish() as &dyn Array;
738
739 let string_array = [
740 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
741 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
742 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
743 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
744 ];
745 let parsed_timestamps = to_timestamp(&string_array)
746 .expect("that to_timestamp with format args parsed values without error");
747 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
748 assert_eq!(parsed_array.len(), 2);
749 assert_eq!(expected_timestamps, parsed_array.as_ref());
750 } else {
751 panic!("Expected a columnar array")
752 }
753 Ok(())
754 }
755
756 #[test]
757 fn to_timestamp_invalid_input_type() -> Result<()> {
758 let mut builder = Int64Array::builder(1);
762 builder.append_value(1);
763 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
764
765 let expected_err =
766 "Execution error: Unsupported data type Int64 for function to_timestamp";
767 match to_timestamp(&[int64array]) {
768 Ok(_) => panic!("Expected error but got success"),
769 Err(e) => {
770 assert!(
771 e.to_string().contains(expected_err),
772 "Can not find expected error '{expected_err}'. Actual error '{e}'"
773 );
774 }
775 }
776 Ok(())
777 }
778
779 #[test]
780 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
781 let mut builder = Int64Array::builder(1);
785 builder.append_value(1);
786 let int64array = [
787 ColumnarValue::Array(Arc::new(builder.finish())),
788 ColumnarValue::Array(Arc::new(builder.finish())),
789 ];
790
791 let expected_err =
792 "Execution error: Unsupported data type Int64 for function to_timestamp";
793 match to_timestamp(&int64array) {
794 Ok(_) => panic!("Expected error but got success"),
795 Err(e) => {
796 assert!(
797 e.to_string().contains(expected_err),
798 "Can not find expected error '{expected_err}'. Actual error '{e}'"
799 );
800 }
801 }
802 Ok(())
803 }
804
805 #[test]
806 fn to_timestamp_with_unparseable_data() -> Result<()> {
807 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
808
809 date_string_builder.append_null();
810
811 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
812
813 let string_array =
814 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
815
816 let expected_err =
817 "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
818 match to_timestamp(&[string_array]) {
819 Ok(_) => panic!("Expected error but got success"),
820 Err(e) => {
821 assert!(
822 e.to_string().contains(expected_err),
823 "Can not find expected error '{expected_err}'. Actual error '{e}'"
824 );
825 }
826 }
827 Ok(())
828 }
829
830 #[test]
831 fn to_timestamp_with_invalid_tz() -> Result<()> {
832 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
833
834 date_string_builder.append_null();
835
836 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
837
838 let string_array =
839 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
840
841 let expected_err =
842 "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
843 match to_timestamp(&[string_array]) {
844 Ok(_) => panic!("Expected error but got success"),
845 Err(e) => {
846 assert!(
847 e.to_string().contains(expected_err),
848 "Can not find expected error '{expected_err}'. Actual error '{e}'"
849 );
850 }
851 }
852 Ok(())
853 }
854
855 #[test]
856 fn to_timestamp_with_no_matching_formats() -> Result<()> {
857 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
858 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
859 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
860 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
861
862 date_string_builder.append_null();
863 format1_builder.append_null();
864 format2_builder.append_null();
865 format3_builder.append_null();
866
867 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
868 format1_builder.append_value("%s");
869 format2_builder.append_value("%c");
870 format3_builder.append_value("%H:%M:%S");
871
872 let string_array = [
873 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
874 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
875 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
876 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
877 ];
878
879 let expected_err =
880 "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
881 match to_timestamp(&string_array) {
882 Ok(_) => panic!("Expected error but got success"),
883 Err(e) => {
884 assert!(
885 e.to_string().contains(expected_err),
886 "Can not find expected error '{expected_err}'. Actual error '{e}'"
887 );
888 }
889 }
890 Ok(())
891 }
892
893 #[test]
894 fn string_to_timestamp_formatted() {
895 assert_eq!(
897 1599572549190855000,
898 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
899 );
900 assert_eq!(
901 1599572549190855000,
902 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
903 );
904 assert_eq!(
905 1599572549000000000,
906 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
907 ); assert_eq!(
909 1599590549190855000,
910 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
911 );
912 assert_eq!(
913 1599590549000000000,
914 parse_timestamp_formatted("1599590549", "%s").unwrap()
915 );
916 assert_eq!(
917 1599572549000000000,
918 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
919 .unwrap()
920 );
921 assert_eq!(
922 1642896000000000000,
923 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
924 );
925 }
926
927 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
928 let result = string_to_timestamp_nanos_formatted(s, format);
929 if let Err(e) = &result {
930 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
931 }
932 result
933 }
934
935 #[test]
936 fn string_to_timestamp_formatted_invalid() {
937 let cases = [
939 ("", "%Y%m%d %H%M%S", "premature end of input"),
940 ("SS", "%c", "premature end of input"),
941 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
942 (
943 "Wed, 18 Feb 2015 23:16:09 GMT",
944 "%XX",
945 "input contains invalid characters",
946 ),
947 (
948 "Wed, 18 Feb 2015 23:16:09 GMT",
949 "%Y%m%d %H%M%S",
950 "input contains invalid characters",
951 ),
952 ];
953
954 for (s, f, ctx) in cases {
955 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
956 let actual = string_to_datetime_formatted(&Utc, s, f)
957 .unwrap_err()
958 .to_string();
959 assert_eq!(actual, expected)
960 }
961 }
962
963 #[test]
964 fn string_to_timestamp_invalid_arguments() {
965 let cases = [
967 ("", "%Y%m%d %H%M%S", "premature end of input"),
968 ("SS", "%c", "premature end of input"),
969 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
970 (
971 "Wed, 18 Feb 2015 23:16:09 GMT",
972 "%XX",
973 "input contains invalid characters",
974 ),
975 (
976 "Wed, 18 Feb 2015 23:16:09 GMT",
977 "%Y%m%d %H%M%S",
978 "input contains invalid characters",
979 ),
980 ];
981
982 for (s, f, ctx) in cases {
983 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
984 let actual = string_to_datetime_formatted(&Utc, s, f)
985 .unwrap_err()
986 .to_string();
987 assert_eq!(actual, expected)
988 }
989 }
990
991 #[test]
992 fn test_tz() {
993 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
994 Box::new(ToTimestampFunc::new()),
995 Box::new(ToTimestampSecondsFunc::new()),
996 Box::new(ToTimestampMillisFunc::new()),
997 Box::new(ToTimestampNanosFunc::new()),
998 Box::new(ToTimestampSecondsFunc::new()),
999 ];
1000
1001 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1002 let mut millis_builder = TimestampMillisecondArray::builder(2);
1003 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1004 let mut sec_builder = TimestampSecondArray::builder(2);
1005
1006 nanos_builder.append_value(1599572549190850000);
1007 millis_builder.append_value(1599572549190);
1008 micros_builder.append_value(1599572549190850);
1009 sec_builder.append_value(1599572549);
1010
1011 let nanos_timestamps =
1012 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
1013 let millis_timestamps =
1014 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1015 let micros_timestamps =
1016 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1017 let sec_timestamps =
1018 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1019
1020 let arrays = &[
1021 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1022 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1023 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1024 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1025 ];
1026
1027 for udf in &udfs {
1028 for array in arrays {
1029 let rt = udf.return_type(&[array.data_type()]).unwrap();
1030 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1031 assert!(matches!(rt, Timestamp(_, Some(_))));
1032 let args = datafusion_expr::ScalarFunctionArgs {
1033 args: vec![array.clone()],
1034 arg_fields: vec![arg_field],
1035 number_rows: 4,
1036 return_field: Field::new("f", rt, true).into(),
1037 };
1038 let res = udf
1039 .invoke_with_args(args)
1040 .expect("that to_timestamp parsed values without error");
1041 let array = match res {
1042 ColumnarValue::Array(res) => res,
1043 _ => panic!("Expected a columnar array"),
1044 };
1045 let ty = array.data_type();
1046 assert!(matches!(ty, Timestamp(_, Some(_))));
1047 }
1048 }
1049
1050 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1051 let mut millis_builder = TimestampMillisecondArray::builder(2);
1052 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1053 let mut sec_builder = TimestampSecondArray::builder(2);
1054 let mut i64_builder = Int64Array::builder(2);
1055
1056 nanos_builder.append_value(1599572549190850000);
1057 millis_builder.append_value(1599572549190);
1058 micros_builder.append_value(1599572549190850);
1059 sec_builder.append_value(1599572549);
1060 i64_builder.append_value(1599572549);
1061
1062 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1063 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1064 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1065 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1066 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1067
1068 let arrays = &[
1069 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1070 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1071 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1072 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1073 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1074 ];
1075
1076 for udf in &udfs {
1077 for array in arrays {
1078 let rt = udf.return_type(&[array.data_type()]).unwrap();
1079 assert!(matches!(rt, Timestamp(_, None)));
1080 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1081 let args = datafusion_expr::ScalarFunctionArgs {
1082 args: vec![array.clone()],
1083 arg_fields: vec![arg_field],
1084 number_rows: 5,
1085 return_field: Field::new("f", rt, true).into(),
1086 };
1087 let res = udf
1088 .invoke_with_args(args)
1089 .expect("that to_timestamp parsed values without error");
1090 let array = match res {
1091 ColumnarValue::Array(res) => res,
1092 _ => panic!("Expected a columnar array"),
1093 };
1094 let ty = array.data_type();
1095 assert!(matches!(ty, Timestamp(_, None)));
1096 }
1097 }
1098 }
1099
1100 #[test]
1101 fn test_to_timestamp_arg_validation() {
1102 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1103 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1104
1105 let data = date_string_builder.finish();
1106
1107 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1108 (Arc::new(to_timestamp), Nanosecond),
1109 (Arc::new(to_timestamp_micros), Microsecond),
1110 (Arc::new(to_timestamp_millis), Millisecond),
1111 (Arc::new(to_timestamp_nanos), Nanosecond),
1112 (Arc::new(to_timestamp_seconds), Second),
1113 ];
1114
1115 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1116 let mut millis_builder = TimestampMillisecondArray::builder(2);
1117 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1118 let mut sec_builder = TimestampSecondArray::builder(2);
1119
1120 nanos_builder.append_value(1599572549190850000);
1121 millis_builder.append_value(1599572549190);
1122 micros_builder.append_value(1599572549190850);
1123 sec_builder.append_value(1599572549);
1124
1125 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1126 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1127 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1128 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1129
1130 for (func, time_unit) in funcs {
1131 let string_array = [
1133 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1134 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1135 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1136 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1137 ];
1138 let parsed_timestamps = func(&string_array)
1139 .expect("that to_timestamp with format args parsed values without error");
1140 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1141 assert_eq!(parsed_array.len(), 1);
1142 match time_unit {
1143 Nanosecond => {
1144 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1145 }
1146 Millisecond => {
1147 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1148 }
1149 Microsecond => {
1150 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1151 }
1152 Second => {
1153 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1154 }
1155 };
1156 } else {
1157 panic!("Expected a columnar array")
1158 }
1159
1160 let string_array = [
1162 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1163 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1164 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1165 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1166 ];
1167 let parsed_timestamps = func(&string_array)
1168 .expect("that to_timestamp with format args parsed values without error");
1169 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1170 assert_eq!(parsed_array.len(), 1);
1171 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1172
1173 match time_unit {
1174 Nanosecond => {
1175 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1176 }
1177 Millisecond => {
1178 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1179 }
1180 Microsecond => {
1181 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1182 }
1183 Second => {
1184 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1185 }
1186 };
1187 } else {
1188 panic!("Expected a columnar array")
1189 }
1190
1191 let string_array = [
1193 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1194 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1195 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1196 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1197 ];
1198
1199 let expected = "Unsupported data type Int32 for function".to_string();
1200 let actual = func(&string_array).unwrap_err().to_string();
1201 assert_contains!(actual, expected);
1202
1203 let string_array = [
1205 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1206 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1207 vec![1i64].into(),
1208 None,
1209 )) as ArrayRef),
1210 ];
1211
1212 let expected = "Unsupported data type".to_string();
1213 let actual = func(&string_array).unwrap_err().to_string();
1214 assert_contains!(actual, expected);
1215 }
1216 }
1217}