1use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::array::Float64Array;
23use arrow::datatypes::DataType::*;
24use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
25use arrow::datatypes::{
26 ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
27 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
28};
29use datafusion_common::format::DEFAULT_CAST_OPTIONS;
30use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
31use datafusion_expr::{
32 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
33};
34use datafusion_macros::user_doc;
35
36#[user_doc(
37 doc_section(label = "Time and Date Functions"),
38 description = r#"
39Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
40
41Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
42"#,
43 syntax_example = "to_timestamp(expression[, ..., format_n])",
44 sql_example = r#"```sql
45> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
46+-----------------------------------------------------------+
47| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
48+-----------------------------------------------------------+
49| 2023-01-31T14:26:56.123456789 |
50+-----------------------------------------------------------+
51> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
52+--------------------------------------------------------------------------------------------------------+
53| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
54+--------------------------------------------------------------------------------------------------------+
55| 2023-05-17T03:59:00.123456789 |
56+--------------------------------------------------------------------------------------------------------+
57```
58Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
59"#,
60 argument(
61 name = "expression",
62 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
63 ),
64 argument(
65 name = "format_n",
66 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
67 )
68)]
69#[derive(Debug, PartialEq, Eq, Hash)]
70pub struct ToTimestampFunc {
71 signature: Signature,
72}
73
74#[user_doc(
75 doc_section(label = "Time and Date Functions"),
76 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
77 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
78 sql_example = r#"```sql
79> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
80+-------------------------------------------------------------------+
81| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
82+-------------------------------------------------------------------+
83| 2023-01-31T14:26:56 |
84+-------------------------------------------------------------------+
85> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
86+----------------------------------------------------------------------------------------------------------------+
87| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
88+----------------------------------------------------------------------------------------------------------------+
89| 2023-05-17T03:59:00 |
90+----------------------------------------------------------------------------------------------------------------+
91```
92Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
93"#,
94 argument(
95 name = "expression",
96 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
97 ),
98 argument(
99 name = "format_n",
100 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
101 )
102)]
103#[derive(Debug, PartialEq, Eq, Hash)]
104pub struct ToTimestampSecondsFunc {
105 signature: Signature,
106}
107
108#[user_doc(
109 doc_section(label = "Time and Date Functions"),
110 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
111 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
112 sql_example = r#"```sql
113> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
114+------------------------------------------------------------------+
115| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
116+------------------------------------------------------------------+
117| 2023-01-31T14:26:56.123 |
118+------------------------------------------------------------------+
119> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
120+---------------------------------------------------------------------------------------------------------------+
121| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
122+---------------------------------------------------------------------------------------------------------------+
123| 2023-05-17T03:59:00.123 |
124+---------------------------------------------------------------------------------------------------------------+
125```
126Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
127"#,
128 argument(
129 name = "expression",
130 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
131 ),
132 argument(
133 name = "format_n",
134 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
135 )
136)]
137#[derive(Debug, PartialEq, Eq, Hash)]
138pub struct ToTimestampMillisFunc {
139 signature: Signature,
140}
141
142#[user_doc(
143 doc_section(label = "Time and Date Functions"),
144 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
145 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
146 sql_example = r#"```sql
147> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
148+------------------------------------------------------------------+
149| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
150+------------------------------------------------------------------+
151| 2023-01-31T14:26:56.123456 |
152+------------------------------------------------------------------+
153> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
154+---------------------------------------------------------------------------------------------------------------+
155| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
156+---------------------------------------------------------------------------------------------------------------+
157| 2023-05-17T03:59:00.123456 |
158+---------------------------------------------------------------------------------------------------------------+
159```
160Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
161"#,
162 argument(
163 name = "expression",
164 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
165 ),
166 argument(
167 name = "format_n",
168 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
169 )
170)]
171#[derive(Debug, PartialEq, Eq, Hash)]
172pub struct ToTimestampMicrosFunc {
173 signature: Signature,
174}
175
176#[user_doc(
177 doc_section(label = "Time and Date Functions"),
178 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
179 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
180 sql_example = r#"```sql
181> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
182+-----------------------------------------------------------------+
183| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
184+-----------------------------------------------------------------+
185| 2023-01-31T14:26:56.123456789 |
186+-----------------------------------------------------------------+
187> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
188+--------------------------------------------------------------------------------------------------------------+
189| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
190+--------------------------------------------------------------------------------------------------------------+
191| 2023-05-17T03:59:00.123456789 |
192+---------------------------------------------------------------------------------------------------------------+
193```
194Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/date_time_functions.rs)
195"#,
196 argument(
197 name = "expression",
198 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
199 ),
200 argument(
201 name = "format_n",
202 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
203 )
204)]
205#[derive(Debug, PartialEq, Eq, Hash)]
206pub struct ToTimestampNanosFunc {
207 signature: Signature,
208}
209
210impl Default for ToTimestampFunc {
211 fn default() -> Self {
212 Self::new()
213 }
214}
215
216impl ToTimestampFunc {
217 pub fn new() -> Self {
218 Self {
219 signature: Signature::variadic_any(Volatility::Immutable),
220 }
221 }
222}
223
224impl Default for ToTimestampSecondsFunc {
225 fn default() -> Self {
226 Self::new()
227 }
228}
229
230impl ToTimestampSecondsFunc {
231 pub fn new() -> Self {
232 Self {
233 signature: Signature::variadic_any(Volatility::Immutable),
234 }
235 }
236}
237
238impl Default for ToTimestampMillisFunc {
239 fn default() -> Self {
240 Self::new()
241 }
242}
243
244impl ToTimestampMillisFunc {
245 pub fn new() -> Self {
246 Self {
247 signature: Signature::variadic_any(Volatility::Immutable),
248 }
249 }
250}
251
252impl Default for ToTimestampMicrosFunc {
253 fn default() -> Self {
254 Self::new()
255 }
256}
257
258impl ToTimestampMicrosFunc {
259 pub fn new() -> Self {
260 Self {
261 signature: Signature::variadic_any(Volatility::Immutable),
262 }
263 }
264}
265
266impl Default for ToTimestampNanosFunc {
267 fn default() -> Self {
268 Self::new()
269 }
270}
271
272impl ToTimestampNanosFunc {
273 pub fn new() -> Self {
274 Self {
275 signature: Signature::variadic_any(Volatility::Immutable),
276 }
277 }
278}
279
280impl ScalarUDFImpl for ToTimestampFunc {
287 fn as_any(&self) -> &dyn Any {
288 self
289 }
290
291 fn name(&self) -> &str {
292 "to_timestamp"
293 }
294
295 fn signature(&self) -> &Signature {
296 &self.signature
297 }
298
299 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
300 Ok(return_type_for(&arg_types[0], Nanosecond))
301 }
302
303 fn invoke_with_args(
304 &self,
305 args: datafusion_expr::ScalarFunctionArgs,
306 ) -> Result<ColumnarValue> {
307 let args = args.args;
308 if args.is_empty() {
309 return exec_err!(
310 "to_timestamp function requires 1 or more arguments, got {}",
311 args.len()
312 );
313 }
314
315 if args.len() > 1 {
317 validate_data_types(&args, "to_timestamp")?;
318 }
319
320 match args[0].data_type() {
321 Int32 | Int64 => args[0]
322 .cast_to(&Timestamp(Second, None), None)?
323 .cast_to(&Timestamp(Nanosecond, None), None),
324 Null | Timestamp(_, None) => {
325 args[0].cast_to(&Timestamp(Nanosecond, None), None)
326 }
327 Float64 => {
328 let rescaled = arrow::compute::kernels::numeric::mul(
329 &args[0].to_array(1)?,
330 &arrow::array::Scalar::new(Float64Array::from(vec![
331 1_000_000_000f64,
332 ])),
333 )?;
334 Ok(ColumnarValue::Array(arrow::compute::cast_with_options(
335 &rescaled,
336 &Timestamp(Nanosecond, None),
337 &DEFAULT_CAST_OPTIONS,
338 )?))
339 }
340 Timestamp(_, Some(tz)) => {
341 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
342 }
343 Utf8View | LargeUtf8 | Utf8 => {
344 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
345 }
346 Decimal128(_, _) => {
347 match &args[0] {
348 ColumnarValue::Scalar(ScalarValue::Decimal128(
349 Some(value),
350 _,
351 scale,
352 )) => {
353 let scale_factor = 10_i128.pow(*scale as u32);
355 let seconds = value / scale_factor;
356 let fraction = value % scale_factor;
357
358 let nanos = (fraction * 1_000_000_000) / scale_factor;
359
360 let timestamp_nanos = seconds * 1_000_000_000 + nanos;
361
362 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
363 Some(timestamp_nanos as i64),
364 None,
365 )))
366 }
367 _ => exec_err!("Invalid decimal value"),
368 }
369 }
370 other => {
371 exec_err!(
372 "Unsupported data type {:?} for function to_timestamp",
373 other
374 )
375 }
376 }
377 }
378 fn documentation(&self) -> Option<&Documentation> {
379 self.doc()
380 }
381}
382
383impl ScalarUDFImpl for ToTimestampSecondsFunc {
384 fn as_any(&self) -> &dyn Any {
385 self
386 }
387
388 fn name(&self) -> &str {
389 "to_timestamp_seconds"
390 }
391
392 fn signature(&self) -> &Signature {
393 &self.signature
394 }
395
396 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
397 Ok(return_type_for(&arg_types[0], Second))
398 }
399
400 fn invoke_with_args(
401 &self,
402 args: datafusion_expr::ScalarFunctionArgs,
403 ) -> Result<ColumnarValue> {
404 let args = args.args;
405 if args.is_empty() {
406 return exec_err!(
407 "to_timestamp_seconds function requires 1 or more arguments, got {}",
408 args.len()
409 );
410 }
411
412 if args.len() > 1 {
414 validate_data_types(&args, "to_timestamp")?;
415 }
416
417 match args[0].data_type() {
418 Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
419 args[0].cast_to(&Timestamp(Second, None), None)
420 }
421 Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
422 Utf8View | LargeUtf8 | Utf8 => {
423 to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
424 }
425 other => {
426 exec_err!(
427 "Unsupported data type {:?} for function to_timestamp_seconds",
428 other
429 )
430 }
431 }
432 }
433 fn documentation(&self) -> Option<&Documentation> {
434 self.doc()
435 }
436}
437
438impl ScalarUDFImpl for ToTimestampMillisFunc {
439 fn as_any(&self) -> &dyn Any {
440 self
441 }
442
443 fn name(&self) -> &str {
444 "to_timestamp_millis"
445 }
446
447 fn signature(&self) -> &Signature {
448 &self.signature
449 }
450
451 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
452 Ok(return_type_for(&arg_types[0], Millisecond))
453 }
454
455 fn invoke_with_args(
456 &self,
457 args: datafusion_expr::ScalarFunctionArgs,
458 ) -> Result<ColumnarValue> {
459 let args = args.args;
460 if args.is_empty() {
461 return exec_err!(
462 "to_timestamp_millis function requires 1 or more arguments, got {}",
463 args.len()
464 );
465 }
466
467 if args.len() > 1 {
469 validate_data_types(&args, "to_timestamp")?;
470 }
471
472 match args[0].data_type() {
473 Null | Int32 | Int64 | Timestamp(_, None) => {
474 args[0].cast_to(&Timestamp(Millisecond, None), None)
475 }
476 Timestamp(_, Some(tz)) => {
477 args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
478 }
479 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
480 &args,
481 "to_timestamp_millis",
482 ),
483 other => {
484 exec_err!(
485 "Unsupported data type {:?} for function to_timestamp_millis",
486 other
487 )
488 }
489 }
490 }
491 fn documentation(&self) -> Option<&Documentation> {
492 self.doc()
493 }
494}
495
496impl ScalarUDFImpl for ToTimestampMicrosFunc {
497 fn as_any(&self) -> &dyn Any {
498 self
499 }
500
501 fn name(&self) -> &str {
502 "to_timestamp_micros"
503 }
504
505 fn signature(&self) -> &Signature {
506 &self.signature
507 }
508
509 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
510 Ok(return_type_for(&arg_types[0], Microsecond))
511 }
512
513 fn invoke_with_args(
514 &self,
515 args: datafusion_expr::ScalarFunctionArgs,
516 ) -> Result<ColumnarValue> {
517 let args = args.args;
518 if args.is_empty() {
519 return exec_err!(
520 "to_timestamp_micros function requires 1 or more arguments, got {}",
521 args.len()
522 );
523 }
524
525 if args.len() > 1 {
527 validate_data_types(&args, "to_timestamp")?;
528 }
529
530 match args[0].data_type() {
531 Null | Int32 | Int64 | Timestamp(_, None) => {
532 args[0].cast_to(&Timestamp(Microsecond, None), None)
533 }
534 Timestamp(_, Some(tz)) => {
535 args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
536 }
537 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
538 &args,
539 "to_timestamp_micros",
540 ),
541 other => {
542 exec_err!(
543 "Unsupported data type {:?} for function to_timestamp_micros",
544 other
545 )
546 }
547 }
548 }
549 fn documentation(&self) -> Option<&Documentation> {
550 self.doc()
551 }
552}
553
554impl ScalarUDFImpl for ToTimestampNanosFunc {
555 fn as_any(&self) -> &dyn Any {
556 self
557 }
558
559 fn name(&self) -> &str {
560 "to_timestamp_nanos"
561 }
562
563 fn signature(&self) -> &Signature {
564 &self.signature
565 }
566
567 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
568 Ok(return_type_for(&arg_types[0], Nanosecond))
569 }
570
571 fn invoke_with_args(
572 &self,
573 args: datafusion_expr::ScalarFunctionArgs,
574 ) -> Result<ColumnarValue> {
575 let args = args.args;
576 if args.is_empty() {
577 return exec_err!(
578 "to_timestamp_nanos function requires 1 or more arguments, got {}",
579 args.len()
580 );
581 }
582
583 if args.len() > 1 {
585 validate_data_types(&args, "to_timestamp")?;
586 }
587
588 match args[0].data_type() {
589 Null | Int32 | Int64 | Timestamp(_, None) => {
590 args[0].cast_to(&Timestamp(Nanosecond, None), None)
591 }
592 Timestamp(_, Some(tz)) => {
593 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
594 }
595 Utf8View | LargeUtf8 | Utf8 => {
596 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
597 }
598 other => {
599 exec_err!(
600 "Unsupported data type {:?} for function to_timestamp_nanos",
601 other
602 )
603 }
604 }
605 }
606 fn documentation(&self) -> Option<&Documentation> {
607 self.doc()
608 }
609}
610
611fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
614 match arg {
615 Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
616 _ => Timestamp(unit, None),
617 }
618}
619
620fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
621 args: &[ColumnarValue],
622 name: &str,
623) -> Result<ColumnarValue> {
624 let factor = match T::UNIT {
625 Second => 1_000_000_000,
626 Millisecond => 1_000_000,
627 Microsecond => 1_000,
628 Nanosecond => 1,
629 };
630
631 match args.len() {
632 1 => handle::<T, _, T>(
633 args,
634 |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
635 name,
636 ),
637 n if n >= 2 => handle_multiple::<T, _, T, _>(
638 args,
639 string_to_timestamp_nanos_formatted,
640 |n| n / factor,
641 name,
642 ),
643 _ => exec_err!("Unsupported 0 argument count for function {name}"),
644 }
645}
646
647#[cfg(test)]
648mod tests {
649 use std::sync::Arc;
650
651 use arrow::array::types::Int64Type;
652 use arrow::array::{
653 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
654 TimestampNanosecondArray, TimestampSecondArray,
655 };
656 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
657 use arrow::datatypes::{Field, TimeUnit};
658 use chrono::Utc;
659 use datafusion_common::config::ConfigOptions;
660 use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
661 use datafusion_expr::ScalarFunctionImplementation;
662
663 use super::*;
664
665 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
666 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
667 }
668
669 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
671 to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
672 }
673
674 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
676 to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
677 }
678
679 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
681 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
682 }
683
684 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
686 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
687 }
688
689 #[test]
690 fn to_timestamp_arrays_and_nulls() -> Result<()> {
691 let mut string_builder = StringBuilder::with_capacity(2, 1024);
694 let mut ts_builder = TimestampNanosecondArray::builder(2);
695
696 string_builder.append_value("2020-09-08T13:42:29.190855");
697 ts_builder.append_value(1599572549190855000);
698
699 string_builder.append_null();
700 ts_builder.append_null();
701 let expected_timestamps = &ts_builder.finish() as &dyn Array;
702
703 let string_array =
704 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
705 let parsed_timestamps = to_timestamp(&[string_array])
706 .expect("that to_timestamp parsed values without error");
707 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
708 assert_eq!(parsed_array.len(), 2);
709 assert_eq!(expected_timestamps, parsed_array.as_ref());
710 } else {
711 panic!("Expected a columnar array")
712 }
713 Ok(())
714 }
715
716 #[test]
717 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
718 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
721 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
722 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
723 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
724 let mut ts_builder = TimestampNanosecondArray::builder(2);
725
726 date_string_builder.append_null();
727 format1_builder.append_null();
728 format2_builder.append_null();
729 format3_builder.append_null();
730 ts_builder.append_null();
731
732 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
733 format1_builder.append_value("%s");
734 format2_builder.append_value("%c");
735 format3_builder.append_value("%+");
736 ts_builder.append_value(1599572549190850000);
737
738 let expected_timestamps = &ts_builder.finish() as &dyn Array;
739
740 let string_array = [
741 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
742 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
743 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
744 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
745 ];
746 let parsed_timestamps = to_timestamp(&string_array)
747 .expect("that to_timestamp with format args parsed values without error");
748 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
749 assert_eq!(parsed_array.len(), 2);
750 assert_eq!(expected_timestamps, parsed_array.as_ref());
751 } else {
752 panic!("Expected a columnar array")
753 }
754 Ok(())
755 }
756
757 #[test]
758 fn to_timestamp_invalid_input_type() -> Result<()> {
759 let mut builder = Int64Array::builder(1);
763 builder.append_value(1);
764 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
765
766 let expected_err =
767 "Execution error: Unsupported data type Int64 for function to_timestamp";
768 match to_timestamp(&[int64array]) {
769 Ok(_) => panic!("Expected error but got success"),
770 Err(e) => {
771 assert!(
772 e.to_string().contains(expected_err),
773 "Can not find expected error '{expected_err}'. Actual error '{e}'"
774 );
775 }
776 }
777 Ok(())
778 }
779
780 #[test]
781 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
782 let mut builder = Int64Array::builder(1);
786 builder.append_value(1);
787 let int64array = [
788 ColumnarValue::Array(Arc::new(builder.finish())),
789 ColumnarValue::Array(Arc::new(builder.finish())),
790 ];
791
792 let expected_err =
793 "Execution error: Unsupported data type Int64 for function to_timestamp";
794 match to_timestamp(&int64array) {
795 Ok(_) => panic!("Expected error but got success"),
796 Err(e) => {
797 assert!(
798 e.to_string().contains(expected_err),
799 "Can not find expected error '{expected_err}'. Actual error '{e}'"
800 );
801 }
802 }
803 Ok(())
804 }
805
806 #[test]
807 fn to_timestamp_with_unparsable_data() -> Result<()> {
808 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
809
810 date_string_builder.append_null();
811
812 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
813
814 let string_array =
815 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
816
817 let expected_err =
818 "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
819 match to_timestamp(&[string_array]) {
820 Ok(_) => panic!("Expected error but got success"),
821 Err(e) => {
822 assert!(
823 e.to_string().contains(expected_err),
824 "Can not find expected error '{expected_err}'. Actual error '{e}'"
825 );
826 }
827 }
828 Ok(())
829 }
830
831 #[test]
832 fn to_timestamp_with_invalid_tz() -> Result<()> {
833 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
834
835 date_string_builder.append_null();
836
837 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
838
839 let string_array =
840 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
841
842 let expected_err =
843 "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
844 match to_timestamp(&[string_array]) {
845 Ok(_) => panic!("Expected error but got success"),
846 Err(e) => {
847 assert!(
848 e.to_string().contains(expected_err),
849 "Can not find expected error '{expected_err}'. Actual error '{e}'"
850 );
851 }
852 }
853 Ok(())
854 }
855
856 #[test]
857 fn to_timestamp_with_no_matching_formats() -> Result<()> {
858 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
859 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
860 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
861 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
862
863 date_string_builder.append_null();
864 format1_builder.append_null();
865 format2_builder.append_null();
866 format3_builder.append_null();
867
868 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
869 format1_builder.append_value("%s");
870 format2_builder.append_value("%c");
871 format3_builder.append_value("%H:%M:%S");
872
873 let string_array = [
874 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
875 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
876 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
877 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
878 ];
879
880 let expected_err =
881 "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
882 match to_timestamp(&string_array) {
883 Ok(_) => panic!("Expected error but got success"),
884 Err(e) => {
885 assert!(
886 e.to_string().contains(expected_err),
887 "Can not find expected error '{expected_err}'. Actual error '{e}'"
888 );
889 }
890 }
891 Ok(())
892 }
893
894 #[test]
895 fn string_to_timestamp_formatted() {
896 assert_eq!(
898 1599572549190855000,
899 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
900 );
901 assert_eq!(
902 1599572549190855000,
903 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
904 );
905 assert_eq!(
906 1599572549000000000,
907 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
908 ); assert_eq!(
910 1599590549190855000,
911 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
912 );
913 assert_eq!(
914 1599590549000000000,
915 parse_timestamp_formatted("1599590549", "%s").unwrap()
916 );
917 assert_eq!(
918 1599572549000000000,
919 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
920 .unwrap()
921 );
922 assert_eq!(
923 1642896000000000000,
924 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
925 );
926 }
927
928 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
929 let result = string_to_timestamp_nanos_formatted(s, format);
930 if let Err(e) = &result {
931 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
932 }
933 result
934 }
935
936 #[test]
937 fn string_to_timestamp_formatted_invalid() {
938 let cases = [
940 ("", "%Y%m%d %H%M%S", "premature end of input"),
941 ("SS", "%c", "premature end of input"),
942 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
943 (
944 "Wed, 18 Feb 2015 23:16:09 GMT",
945 "%XX",
946 "input contains invalid characters",
947 ),
948 (
949 "Wed, 18 Feb 2015 23:16:09 GMT",
950 "%Y%m%d %H%M%S",
951 "input contains invalid characters",
952 ),
953 ];
954
955 for (s, f, ctx) in cases {
956 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
957 let actual = string_to_datetime_formatted(&Utc, s, f)
958 .unwrap_err()
959 .to_string();
960 assert_eq!(actual, expected)
961 }
962 }
963
964 #[test]
965 fn string_to_timestamp_invalid_arguments() {
966 let cases = [
968 ("", "%Y%m%d %H%M%S", "premature end of input"),
969 ("SS", "%c", "premature end of input"),
970 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
971 (
972 "Wed, 18 Feb 2015 23:16:09 GMT",
973 "%XX",
974 "input contains invalid characters",
975 ),
976 (
977 "Wed, 18 Feb 2015 23:16:09 GMT",
978 "%Y%m%d %H%M%S",
979 "input contains invalid characters",
980 ),
981 ];
982
983 for (s, f, ctx) in cases {
984 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
985 let actual = string_to_datetime_formatted(&Utc, s, f)
986 .unwrap_err()
987 .to_string();
988 assert_eq!(actual, expected)
989 }
990 }
991
992 #[test]
993 fn test_tz() {
994 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
995 Box::new(ToTimestampFunc::new()),
996 Box::new(ToTimestampSecondsFunc::new()),
997 Box::new(ToTimestampMillisFunc::new()),
998 Box::new(ToTimestampNanosFunc::new()),
999 Box::new(ToTimestampSecondsFunc::new()),
1000 ];
1001
1002 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1003 let mut millis_builder = TimestampMillisecondArray::builder(2);
1004 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1005 let mut sec_builder = TimestampSecondArray::builder(2);
1006
1007 nanos_builder.append_value(1599572549190850000);
1008 millis_builder.append_value(1599572549190);
1009 micros_builder.append_value(1599572549190850);
1010 sec_builder.append_value(1599572549);
1011
1012 let nanos_timestamps =
1013 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
1014 let millis_timestamps =
1015 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1016 let micros_timestamps =
1017 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1018 let sec_timestamps =
1019 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1020
1021 let arrays = &[
1022 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1023 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1024 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1025 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1026 ];
1027
1028 for udf in &udfs {
1029 for array in arrays {
1030 let rt = udf.return_type(&[array.data_type()]).unwrap();
1031 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1032 assert!(matches!(rt, Timestamp(_, Some(_))));
1033 let args = datafusion_expr::ScalarFunctionArgs {
1034 args: vec![array.clone()],
1035 arg_fields: vec![arg_field],
1036 number_rows: 4,
1037 return_field: Field::new("f", rt, true).into(),
1038 config_options: Arc::new(ConfigOptions::default()),
1039 };
1040 let res = udf
1041 .invoke_with_args(args)
1042 .expect("that to_timestamp parsed values without error");
1043 let array = match res {
1044 ColumnarValue::Array(res) => res,
1045 _ => panic!("Expected a columnar array"),
1046 };
1047 let ty = array.data_type();
1048 assert!(matches!(ty, Timestamp(_, Some(_))));
1049 }
1050 }
1051
1052 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1053 let mut millis_builder = TimestampMillisecondArray::builder(2);
1054 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1055 let mut sec_builder = TimestampSecondArray::builder(2);
1056 let mut i64_builder = Int64Array::builder(2);
1057
1058 nanos_builder.append_value(1599572549190850000);
1059 millis_builder.append_value(1599572549190);
1060 micros_builder.append_value(1599572549190850);
1061 sec_builder.append_value(1599572549);
1062 i64_builder.append_value(1599572549);
1063
1064 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1065 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1066 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1067 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1068 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1069
1070 let arrays = &[
1071 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1072 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1073 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1074 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1075 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1076 ];
1077
1078 for udf in &udfs {
1079 for array in arrays {
1080 let rt = udf.return_type(&[array.data_type()]).unwrap();
1081 assert!(matches!(rt, Timestamp(_, None)));
1082 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1083 let args = datafusion_expr::ScalarFunctionArgs {
1084 args: vec![array.clone()],
1085 arg_fields: vec![arg_field],
1086 number_rows: 5,
1087 return_field: Field::new("f", rt, true).into(),
1088 config_options: Arc::new(ConfigOptions::default()),
1089 };
1090 let res = udf
1091 .invoke_with_args(args)
1092 .expect("that to_timestamp parsed values without error");
1093 let array = match res {
1094 ColumnarValue::Array(res) => res,
1095 _ => panic!("Expected a columnar array"),
1096 };
1097 let ty = array.data_type();
1098 assert!(matches!(ty, Timestamp(_, None)));
1099 }
1100 }
1101 }
1102
1103 #[test]
1104 fn test_to_timestamp_arg_validation() {
1105 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1106 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1107
1108 let data = date_string_builder.finish();
1109
1110 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1111 (Arc::new(to_timestamp), Nanosecond),
1112 (Arc::new(to_timestamp_micros), Microsecond),
1113 (Arc::new(to_timestamp_millis), Millisecond),
1114 (Arc::new(to_timestamp_nanos), Nanosecond),
1115 (Arc::new(to_timestamp_seconds), Second),
1116 ];
1117
1118 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1119 let mut millis_builder = TimestampMillisecondArray::builder(2);
1120 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1121 let mut sec_builder = TimestampSecondArray::builder(2);
1122
1123 nanos_builder.append_value(1599572549190850000);
1124 millis_builder.append_value(1599572549190);
1125 micros_builder.append_value(1599572549190850);
1126 sec_builder.append_value(1599572549);
1127
1128 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1129 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1130 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1131 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1132
1133 for (func, time_unit) in funcs {
1134 let string_array = [
1136 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1137 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1138 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1139 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1140 ];
1141 let parsed_timestamps = func(&string_array)
1142 .expect("that to_timestamp with format args parsed values without error");
1143 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1144 assert_eq!(parsed_array.len(), 1);
1145 match time_unit {
1146 Nanosecond => {
1147 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1148 }
1149 Millisecond => {
1150 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1151 }
1152 Microsecond => {
1153 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1154 }
1155 Second => {
1156 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1157 }
1158 };
1159 } else {
1160 panic!("Expected a columnar array")
1161 }
1162
1163 let string_array = [
1165 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1166 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1167 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1168 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1169 ];
1170 let parsed_timestamps = func(&string_array)
1171 .expect("that to_timestamp with format args parsed values without error");
1172 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1173 assert_eq!(parsed_array.len(), 1);
1174 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1175
1176 match time_unit {
1177 Nanosecond => {
1178 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1179 }
1180 Millisecond => {
1181 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1182 }
1183 Microsecond => {
1184 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1185 }
1186 Second => {
1187 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1188 }
1189 };
1190 } else {
1191 panic!("Expected a columnar array")
1192 }
1193
1194 let string_array = [
1196 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1197 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1198 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1199 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1200 ];
1201
1202 let expected = "Unsupported data type Int32 for function".to_string();
1203 let actual = func(&string_array).unwrap_err().to_string();
1204 assert_contains!(actual, expected);
1205
1206 let string_array = [
1208 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1209 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1210 vec![1i64].into(),
1211 None,
1212 )) as ArrayRef),
1213 ];
1214
1215 let expected = "Unsupported data type".to_string();
1216 let actual = func(&string_array).unwrap_err().to_string();
1217 assert_contains!(actual, expected);
1218 }
1219 }
1220}