1use std::any::Any;
19use std::sync::Arc;
20
21use crate::datetime::common::*;
22use arrow::datatypes::DataType::*;
23use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
24use arrow::datatypes::{
25 ArrowTimestampType, DataType, TimeUnit, TimestampMicrosecondType,
26 TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
27};
28use datafusion_common::{exec_err, Result, ScalarType, ScalarValue};
29use datafusion_expr::{
30 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
31};
32use datafusion_macros::user_doc;
33
34#[user_doc(
35 doc_section(label = "Time and Date Functions"),
36 description = r#"
37Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.
38
39Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds.
40"#,
41 syntax_example = "to_timestamp(expression[, ..., format_n])",
42 sql_example = r#"```sql
43> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
44+-----------------------------------------------------------+
45| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
46+-----------------------------------------------------------+
47| 2023-01-31T14:26:56.123456789 |
48+-----------------------------------------------------------+
49> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
50+--------------------------------------------------------------------------------------------------------+
51| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
52+--------------------------------------------------------------------------------------------------------+
53| 2023-05-17T03:59:00.123456789 |
54+--------------------------------------------------------------------------------------------------------+
55```
56Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
57"#,
58 argument(
59 name = "expression",
60 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
61 ),
62 argument(
63 name = "format_n",
64 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
65 )
66)]
67#[derive(Debug)]
68pub struct ToTimestampFunc {
69 signature: Signature,
70}
71
72#[user_doc(
73 doc_section(label = "Time and Date Functions"),
74 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
75 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
76 sql_example = r#"```sql
77> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
78+-------------------------------------------------------------------+
79| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
80+-------------------------------------------------------------------+
81| 2023-01-31T14:26:56 |
82+-------------------------------------------------------------------+
83> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
84+----------------------------------------------------------------------------------------------------------------+
85| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
86+----------------------------------------------------------------------------------------------------------------+
87| 2023-05-17T03:59:00 |
88+----------------------------------------------------------------------------------------------------------------+
89```
90Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
91"#,
92 argument(
93 name = "expression",
94 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
95 ),
96 argument(
97 name = "format_n",
98 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
99 )
100)]
101#[derive(Debug)]
102pub struct ToTimestampSecondsFunc {
103 signature: Signature,
104}
105
106#[user_doc(
107 doc_section(label = "Time and Date Functions"),
108 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
109 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
110 sql_example = r#"```sql
111> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
112+------------------------------------------------------------------+
113| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
114+------------------------------------------------------------------+
115| 2023-01-31T14:26:56.123 |
116+------------------------------------------------------------------+
117> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
118+---------------------------------------------------------------------------------------------------------------+
119| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
120+---------------------------------------------------------------------------------------------------------------+
121| 2023-05-17T03:59:00.123 |
122+---------------------------------------------------------------------------------------------------------------+
123```
124Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
125"#,
126 argument(
127 name = "expression",
128 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
129 ),
130 argument(
131 name = "format_n",
132 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
133 )
134)]
135#[derive(Debug)]
136pub struct ToTimestampMillisFunc {
137 signature: Signature,
138}
139
140#[user_doc(
141 doc_section(label = "Time and Date Functions"),
142 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp.",
143 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
144 sql_example = r#"```sql
145> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
146+------------------------------------------------------------------+
147| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
148+------------------------------------------------------------------+
149| 2023-01-31T14:26:56.123456 |
150+------------------------------------------------------------------+
151> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
152+---------------------------------------------------------------------------------------------------------------+
153| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
154+---------------------------------------------------------------------------------------------------------------+
155| 2023-05-17T03:59:00.123456 |
156+---------------------------------------------------------------------------------------------------------------+
157```
158Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
159"#,
160 argument(
161 name = "expression",
162 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
163 ),
164 argument(
165 name = "format_n",
166 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
167 )
168)]
169#[derive(Debug)]
170pub struct ToTimestampMicrosFunc {
171 signature: Signature,
172}
173
174#[user_doc(
175 doc_section(label = "Time and Date Functions"),
176 description = "Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp.",
177 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
178 sql_example = r#"```sql
179> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
180+-----------------------------------------------------------------+
181| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
182+-----------------------------------------------------------------+
183| 2023-01-31T14:26:56.123456789 |
184+-----------------------------------------------------------------+
185> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
186+--------------------------------------------------------------------------------------------------------------+
187| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
188+--------------------------------------------------------------------------------------------------------------+
189| 2023-05-17T03:59:00.123456789 |
190+---------------------------------------------------------------------------------------------------------------+
191```
192Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs)
193"#,
194 argument(
195 name = "expression",
196 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
197 ),
198 argument(
199 name = "format_n",
200 description = "Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned."
201 )
202)]
203#[derive(Debug)]
204pub struct ToTimestampNanosFunc {
205 signature: Signature,
206}
207
208impl Default for ToTimestampFunc {
209 fn default() -> Self {
210 Self::new()
211 }
212}
213
214impl ToTimestampFunc {
215 pub fn new() -> Self {
216 Self {
217 signature: Signature::variadic_any(Volatility::Immutable),
218 }
219 }
220}
221
222impl Default for ToTimestampSecondsFunc {
223 fn default() -> Self {
224 Self::new()
225 }
226}
227
228impl ToTimestampSecondsFunc {
229 pub fn new() -> Self {
230 Self {
231 signature: Signature::variadic_any(Volatility::Immutable),
232 }
233 }
234}
235
236impl Default for ToTimestampMillisFunc {
237 fn default() -> Self {
238 Self::new()
239 }
240}
241
242impl ToTimestampMillisFunc {
243 pub fn new() -> Self {
244 Self {
245 signature: Signature::variadic_any(Volatility::Immutable),
246 }
247 }
248}
249
250impl Default for ToTimestampMicrosFunc {
251 fn default() -> Self {
252 Self::new()
253 }
254}
255
256impl ToTimestampMicrosFunc {
257 pub fn new() -> Self {
258 Self {
259 signature: Signature::variadic_any(Volatility::Immutable),
260 }
261 }
262}
263
264impl Default for ToTimestampNanosFunc {
265 fn default() -> Self {
266 Self::new()
267 }
268}
269
270impl ToTimestampNanosFunc {
271 pub fn new() -> Self {
272 Self {
273 signature: Signature::variadic_any(Volatility::Immutable),
274 }
275 }
276}
277
278impl ScalarUDFImpl for ToTimestampFunc {
285 fn as_any(&self) -> &dyn Any {
286 self
287 }
288
289 fn name(&self) -> &str {
290 "to_timestamp"
291 }
292
293 fn signature(&self) -> &Signature {
294 &self.signature
295 }
296
297 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
298 Ok(return_type_for(&arg_types[0], Nanosecond))
299 }
300
301 fn invoke_with_args(
302 &self,
303 args: datafusion_expr::ScalarFunctionArgs,
304 ) -> Result<ColumnarValue> {
305 let args = args.args;
306 if args.is_empty() {
307 return exec_err!(
308 "to_timestamp function requires 1 or more arguments, got {}",
309 args.len()
310 );
311 }
312
313 if args.len() > 1 {
315 validate_data_types(&args, "to_timestamp")?;
316 }
317
318 match args[0].data_type() {
319 Int32 | Int64 => args[0]
320 .cast_to(&Timestamp(Second, None), None)?
321 .cast_to(&Timestamp(Nanosecond, None), None),
322 Null | Float64 | Timestamp(_, None) => {
323 args[0].cast_to(&Timestamp(Nanosecond, None), None)
324 }
325 Timestamp(_, Some(tz)) => {
326 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
327 }
328 Utf8View | LargeUtf8 | Utf8 => {
329 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp")
330 }
331 Decimal128(_, _) => {
332 match &args[0] {
333 ColumnarValue::Scalar(ScalarValue::Decimal128(
334 Some(value),
335 _,
336 scale,
337 )) => {
338 let scale_factor = 10_i128.pow(*scale as u32);
340 let seconds = value / scale_factor;
341 let fraction = value % scale_factor;
342
343 let nanos = (fraction * 1_000_000_000) / scale_factor;
344
345 let timestamp_nanos = seconds * 1_000_000_000 + nanos;
346
347 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
348 Some(timestamp_nanos as i64),
349 None,
350 )))
351 }
352 _ => exec_err!("Invalid decimal value"),
353 }
354 }
355 other => {
356 exec_err!(
357 "Unsupported data type {:?} for function to_timestamp",
358 other
359 )
360 }
361 }
362 }
363 fn documentation(&self) -> Option<&Documentation> {
364 self.doc()
365 }
366}
367
368impl ScalarUDFImpl for ToTimestampSecondsFunc {
369 fn as_any(&self) -> &dyn Any {
370 self
371 }
372
373 fn name(&self) -> &str {
374 "to_timestamp_seconds"
375 }
376
377 fn signature(&self) -> &Signature {
378 &self.signature
379 }
380
381 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
382 Ok(return_type_for(&arg_types[0], Second))
383 }
384
385 fn invoke_with_args(
386 &self,
387 args: datafusion_expr::ScalarFunctionArgs,
388 ) -> Result<ColumnarValue> {
389 let args = args.args;
390 if args.is_empty() {
391 return exec_err!(
392 "to_timestamp_seconds function requires 1 or more arguments, got {}",
393 args.len()
394 );
395 }
396
397 if args.len() > 1 {
399 validate_data_types(&args, "to_timestamp")?;
400 }
401
402 match args[0].data_type() {
403 Null | Int32 | Int64 | Timestamp(_, None) | Decimal128(_, _) => {
404 args[0].cast_to(&Timestamp(Second, None), None)
405 }
406 Timestamp(_, Some(tz)) => args[0].cast_to(&Timestamp(Second, Some(tz)), None),
407 Utf8View | LargeUtf8 | Utf8 => {
408 to_timestamp_impl::<TimestampSecondType>(&args, "to_timestamp_seconds")
409 }
410 other => {
411 exec_err!(
412 "Unsupported data type {:?} for function to_timestamp_seconds",
413 other
414 )
415 }
416 }
417 }
418 fn documentation(&self) -> Option<&Documentation> {
419 self.doc()
420 }
421}
422
423impl ScalarUDFImpl for ToTimestampMillisFunc {
424 fn as_any(&self) -> &dyn Any {
425 self
426 }
427
428 fn name(&self) -> &str {
429 "to_timestamp_millis"
430 }
431
432 fn signature(&self) -> &Signature {
433 &self.signature
434 }
435
436 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
437 Ok(return_type_for(&arg_types[0], Millisecond))
438 }
439
440 fn invoke_with_args(
441 &self,
442 args: datafusion_expr::ScalarFunctionArgs,
443 ) -> Result<ColumnarValue> {
444 let args = args.args;
445 if args.is_empty() {
446 return exec_err!(
447 "to_timestamp_millis function requires 1 or more arguments, got {}",
448 args.len()
449 );
450 }
451
452 if args.len() > 1 {
454 validate_data_types(&args, "to_timestamp")?;
455 }
456
457 match args[0].data_type() {
458 Null | Int32 | Int64 | Timestamp(_, None) => {
459 args[0].cast_to(&Timestamp(Millisecond, None), None)
460 }
461 Timestamp(_, Some(tz)) => {
462 args[0].cast_to(&Timestamp(Millisecond, Some(tz)), None)
463 }
464 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
465 &args,
466 "to_timestamp_millis",
467 ),
468 other => {
469 exec_err!(
470 "Unsupported data type {:?} for function to_timestamp_millis",
471 other
472 )
473 }
474 }
475 }
476 fn documentation(&self) -> Option<&Documentation> {
477 self.doc()
478 }
479}
480
481impl ScalarUDFImpl for ToTimestampMicrosFunc {
482 fn as_any(&self) -> &dyn Any {
483 self
484 }
485
486 fn name(&self) -> &str {
487 "to_timestamp_micros"
488 }
489
490 fn signature(&self) -> &Signature {
491 &self.signature
492 }
493
494 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
495 Ok(return_type_for(&arg_types[0], Microsecond))
496 }
497
498 fn invoke_with_args(
499 &self,
500 args: datafusion_expr::ScalarFunctionArgs,
501 ) -> Result<ColumnarValue> {
502 let args = args.args;
503 if args.is_empty() {
504 return exec_err!(
505 "to_timestamp_micros function requires 1 or more arguments, got {}",
506 args.len()
507 );
508 }
509
510 if args.len() > 1 {
512 validate_data_types(&args, "to_timestamp")?;
513 }
514
515 match args[0].data_type() {
516 Null | Int32 | Int64 | Timestamp(_, None) => {
517 args[0].cast_to(&Timestamp(Microsecond, None), None)
518 }
519 Timestamp(_, Some(tz)) => {
520 args[0].cast_to(&Timestamp(Microsecond, Some(tz)), None)
521 }
522 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
523 &args,
524 "to_timestamp_micros",
525 ),
526 other => {
527 exec_err!(
528 "Unsupported data type {:?} for function to_timestamp_micros",
529 other
530 )
531 }
532 }
533 }
534 fn documentation(&self) -> Option<&Documentation> {
535 self.doc()
536 }
537}
538
539impl ScalarUDFImpl for ToTimestampNanosFunc {
540 fn as_any(&self) -> &dyn Any {
541 self
542 }
543
544 fn name(&self) -> &str {
545 "to_timestamp_nanos"
546 }
547
548 fn signature(&self) -> &Signature {
549 &self.signature
550 }
551
552 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
553 Ok(return_type_for(&arg_types[0], Nanosecond))
554 }
555
556 fn invoke_with_args(
557 &self,
558 args: datafusion_expr::ScalarFunctionArgs,
559 ) -> Result<ColumnarValue> {
560 let args = args.args;
561 if args.is_empty() {
562 return exec_err!(
563 "to_timestamp_nanos function requires 1 or more arguments, got {}",
564 args.len()
565 );
566 }
567
568 if args.len() > 1 {
570 validate_data_types(&args, "to_timestamp")?;
571 }
572
573 match args[0].data_type() {
574 Null | Int32 | Int64 | Timestamp(_, None) => {
575 args[0].cast_to(&Timestamp(Nanosecond, None), None)
576 }
577 Timestamp(_, Some(tz)) => {
578 args[0].cast_to(&Timestamp(Nanosecond, Some(tz)), None)
579 }
580 Utf8View | LargeUtf8 | Utf8 => {
581 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp_nanos")
582 }
583 other => {
584 exec_err!(
585 "Unsupported data type {:?} for function to_timestamp_nanos",
586 other
587 )
588 }
589 }
590 }
591 fn documentation(&self) -> Option<&Documentation> {
592 self.doc()
593 }
594}
595
596fn return_type_for(arg: &DataType, unit: TimeUnit) -> DataType {
599 match arg {
600 Timestamp(_, Some(tz)) => Timestamp(unit, Some(Arc::clone(tz))),
601 _ => Timestamp(unit, None),
602 }
603}
604
605fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
606 args: &[ColumnarValue],
607 name: &str,
608) -> Result<ColumnarValue> {
609 let factor = match T::UNIT {
610 Second => 1_000_000_000,
611 Millisecond => 1_000_000,
612 Microsecond => 1_000,
613 Nanosecond => 1,
614 };
615
616 match args.len() {
617 1 => handle::<T, _, T>(
618 args,
619 |s| string_to_timestamp_nanos_shim(s).map(|n| n / factor),
620 name,
621 ),
622 n if n >= 2 => handle_multiple::<T, _, T, _>(
623 args,
624 string_to_timestamp_nanos_formatted,
625 |n| n / factor,
626 name,
627 ),
628 _ => exec_err!("Unsupported 0 argument count for function {name}"),
629 }
630}
631
632#[cfg(test)]
633mod tests {
634 use std::sync::Arc;
635
636 use arrow::array::types::Int64Type;
637 use arrow::array::{
638 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
639 TimestampNanosecondArray, TimestampSecondArray,
640 };
641 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
642 use arrow::datatypes::{Field, TimeUnit};
643 use chrono::Utc;
644 use datafusion_common::{assert_contains, DataFusionError, ScalarValue};
645 use datafusion_expr::ScalarFunctionImplementation;
646
647 use super::*;
648
649 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
650 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp")
651 }
652
653 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
655 to_timestamp_impl::<TimestampMillisecondType>(args, "to_timestamp_millis")
656 }
657
658 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
660 to_timestamp_impl::<TimestampMicrosecondType>(args, "to_timestamp_micros")
661 }
662
663 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
665 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp_nanos")
666 }
667
668 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
670 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds")
671 }
672
673 #[test]
674 fn to_timestamp_arrays_and_nulls() -> Result<()> {
675 let mut string_builder = StringBuilder::with_capacity(2, 1024);
678 let mut ts_builder = TimestampNanosecondArray::builder(2);
679
680 string_builder.append_value("2020-09-08T13:42:29.190855");
681 ts_builder.append_value(1599572549190855000);
682
683 string_builder.append_null();
684 ts_builder.append_null();
685 let expected_timestamps = &ts_builder.finish() as &dyn Array;
686
687 let string_array =
688 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
689 let parsed_timestamps = to_timestamp(&[string_array])
690 .expect("that to_timestamp parsed values without error");
691 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
692 assert_eq!(parsed_array.len(), 2);
693 assert_eq!(expected_timestamps, parsed_array.as_ref());
694 } else {
695 panic!("Expected a columnar array")
696 }
697 Ok(())
698 }
699
700 #[test]
701 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
702 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
705 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
706 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
707 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
708 let mut ts_builder = TimestampNanosecondArray::builder(2);
709
710 date_string_builder.append_null();
711 format1_builder.append_null();
712 format2_builder.append_null();
713 format3_builder.append_null();
714 ts_builder.append_null();
715
716 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
717 format1_builder.append_value("%s");
718 format2_builder.append_value("%c");
719 format3_builder.append_value("%+");
720 ts_builder.append_value(1599572549190850000);
721
722 let expected_timestamps = &ts_builder.finish() as &dyn Array;
723
724 let string_array = [
725 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
726 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
727 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
728 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
729 ];
730 let parsed_timestamps = to_timestamp(&string_array)
731 .expect("that to_timestamp with format args parsed values without error");
732 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
733 assert_eq!(parsed_array.len(), 2);
734 assert_eq!(expected_timestamps, parsed_array.as_ref());
735 } else {
736 panic!("Expected a columnar array")
737 }
738 Ok(())
739 }
740
741 #[test]
742 fn to_timestamp_invalid_input_type() -> Result<()> {
743 let mut builder = Int64Array::builder(1);
747 builder.append_value(1);
748 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
749
750 let expected_err =
751 "Execution error: Unsupported data type Int64 for function to_timestamp";
752 match to_timestamp(&[int64array]) {
753 Ok(_) => panic!("Expected error but got success"),
754 Err(e) => {
755 assert!(
756 e.to_string().contains(expected_err),
757 "Can not find expected error '{expected_err}'. Actual error '{e}'"
758 );
759 }
760 }
761 Ok(())
762 }
763
764 #[test]
765 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
766 let mut builder = Int64Array::builder(1);
770 builder.append_value(1);
771 let int64array = [
772 ColumnarValue::Array(Arc::new(builder.finish())),
773 ColumnarValue::Array(Arc::new(builder.finish())),
774 ];
775
776 let expected_err =
777 "Execution error: Unsupported data type Int64 for function to_timestamp";
778 match to_timestamp(&int64array) {
779 Ok(_) => panic!("Expected error but got success"),
780 Err(e) => {
781 assert!(
782 e.to_string().contains(expected_err),
783 "Can not find expected error '{expected_err}'. Actual error '{e}'"
784 );
785 }
786 }
787 Ok(())
788 }
789
790 #[test]
791 fn to_timestamp_with_unparseable_data() -> Result<()> {
792 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
793
794 date_string_builder.append_null();
795
796 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
797
798 let string_array =
799 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
800
801 let expected_err =
802 "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
803 match to_timestamp(&[string_array]) {
804 Ok(_) => panic!("Expected error but got success"),
805 Err(e) => {
806 assert!(
807 e.to_string().contains(expected_err),
808 "Can not find expected error '{expected_err}'. Actual error '{e}'"
809 );
810 }
811 }
812 Ok(())
813 }
814
815 #[test]
816 fn to_timestamp_with_invalid_tz() -> Result<()> {
817 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
818
819 date_string_builder.append_null();
820
821 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
822
823 let string_array =
824 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
825
826 let expected_err =
827 "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
828 match to_timestamp(&[string_array]) {
829 Ok(_) => panic!("Expected error but got success"),
830 Err(e) => {
831 assert!(
832 e.to_string().contains(expected_err),
833 "Can not find expected error '{expected_err}'. Actual error '{e}'"
834 );
835 }
836 }
837 Ok(())
838 }
839
840 #[test]
841 fn to_timestamp_with_no_matching_formats() -> Result<()> {
842 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
843 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
844 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
845 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
846
847 date_string_builder.append_null();
848 format1_builder.append_null();
849 format2_builder.append_null();
850 format3_builder.append_null();
851
852 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
853 format1_builder.append_value("%s");
854 format2_builder.append_value("%c");
855 format3_builder.append_value("%H:%M:%S");
856
857 let string_array = [
858 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
859 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
860 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
861 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
862 ];
863
864 let expected_err =
865 "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
866 match to_timestamp(&string_array) {
867 Ok(_) => panic!("Expected error but got success"),
868 Err(e) => {
869 assert!(
870 e.to_string().contains(expected_err),
871 "Can not find expected error '{expected_err}'. Actual error '{e}'"
872 );
873 }
874 }
875 Ok(())
876 }
877
878 #[test]
879 fn string_to_timestamp_formatted() {
880 assert_eq!(
882 1599572549190855000,
883 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
884 );
885 assert_eq!(
886 1599572549190855000,
887 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
888 );
889 assert_eq!(
890 1599572549000000000,
891 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
892 ); assert_eq!(
894 1599590549190855000,
895 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
896 );
897 assert_eq!(
898 1599590549000000000,
899 parse_timestamp_formatted("1599590549", "%s").unwrap()
900 );
901 assert_eq!(
902 1599572549000000000,
903 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
904 .unwrap()
905 );
906 assert_eq!(
907 1642896000000000000,
908 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
909 );
910 }
911
912 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
913 let result = string_to_timestamp_nanos_formatted(s, format);
914 if let Err(e) = &result {
915 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
916 }
917 result
918 }
919
920 #[test]
921 fn string_to_timestamp_formatted_invalid() {
922 let cases = [
924 ("", "%Y%m%d %H%M%S", "premature end of input"),
925 ("SS", "%c", "premature end of input"),
926 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
927 (
928 "Wed, 18 Feb 2015 23:16:09 GMT",
929 "%XX",
930 "input contains invalid characters",
931 ),
932 (
933 "Wed, 18 Feb 2015 23:16:09 GMT",
934 "%Y%m%d %H%M%S",
935 "input contains invalid characters",
936 ),
937 ];
938
939 for (s, f, ctx) in cases {
940 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
941 let actual = string_to_datetime_formatted(&Utc, s, f)
942 .unwrap_err()
943 .to_string();
944 assert_eq!(actual, expected)
945 }
946 }
947
948 #[test]
949 fn string_to_timestamp_invalid_arguments() {
950 let cases = [
952 ("", "%Y%m%d %H%M%S", "premature end of input"),
953 ("SS", "%c", "premature end of input"),
954 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
955 (
956 "Wed, 18 Feb 2015 23:16:09 GMT",
957 "%XX",
958 "input contains invalid characters",
959 ),
960 (
961 "Wed, 18 Feb 2015 23:16:09 GMT",
962 "%Y%m%d %H%M%S",
963 "input contains invalid characters",
964 ),
965 ];
966
967 for (s, f, ctx) in cases {
968 let expected = format!("Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}");
969 let actual = string_to_datetime_formatted(&Utc, s, f)
970 .unwrap_err()
971 .to_string();
972 assert_eq!(actual, expected)
973 }
974 }
975
976 #[test]
977 fn test_tz() {
978 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
979 Box::new(ToTimestampFunc::new()),
980 Box::new(ToTimestampSecondsFunc::new()),
981 Box::new(ToTimestampMillisFunc::new()),
982 Box::new(ToTimestampNanosFunc::new()),
983 Box::new(ToTimestampSecondsFunc::new()),
984 ];
985
986 let mut nanos_builder = TimestampNanosecondArray::builder(2);
987 let mut millis_builder = TimestampMillisecondArray::builder(2);
988 let mut micros_builder = TimestampMicrosecondArray::builder(2);
989 let mut sec_builder = TimestampSecondArray::builder(2);
990
991 nanos_builder.append_value(1599572549190850000);
992 millis_builder.append_value(1599572549190);
993 micros_builder.append_value(1599572549190850);
994 sec_builder.append_value(1599572549);
995
996 let nanos_timestamps =
997 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
998 let millis_timestamps =
999 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1000 let micros_timestamps =
1001 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1002 let sec_timestamps =
1003 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1004
1005 let arrays = &[
1006 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1007 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1008 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1009 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1010 ];
1011
1012 for udf in &udfs {
1013 for array in arrays {
1014 let rt = udf.return_type(&[array.data_type()]).unwrap();
1015 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1016 assert!(matches!(rt, Timestamp(_, Some(_))));
1017 let args = datafusion_expr::ScalarFunctionArgs {
1018 args: vec![array.clone()],
1019 arg_fields: vec![arg_field],
1020 number_rows: 4,
1021 return_field: Field::new("f", rt, true).into(),
1022 };
1023 let res = udf
1024 .invoke_with_args(args)
1025 .expect("that to_timestamp parsed values without error");
1026 let array = match res {
1027 ColumnarValue::Array(res) => res,
1028 _ => panic!("Expected a columnar array"),
1029 };
1030 let ty = array.data_type();
1031 assert!(matches!(ty, Timestamp(_, Some(_))));
1032 }
1033 }
1034
1035 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1036 let mut millis_builder = TimestampMillisecondArray::builder(2);
1037 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1038 let mut sec_builder = TimestampSecondArray::builder(2);
1039 let mut i64_builder = Int64Array::builder(2);
1040
1041 nanos_builder.append_value(1599572549190850000);
1042 millis_builder.append_value(1599572549190);
1043 micros_builder.append_value(1599572549190850);
1044 sec_builder.append_value(1599572549);
1045 i64_builder.append_value(1599572549);
1046
1047 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1048 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1049 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1050 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1051 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1052
1053 let arrays = &[
1054 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1055 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1056 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1057 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1058 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1059 ];
1060
1061 for udf in &udfs {
1062 for array in arrays {
1063 let rt = udf.return_type(&[array.data_type()]).unwrap();
1064 assert!(matches!(rt, Timestamp(_, None)));
1065 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1066 let args = datafusion_expr::ScalarFunctionArgs {
1067 args: vec![array.clone()],
1068 arg_fields: vec![arg_field],
1069 number_rows: 5,
1070 return_field: Field::new("f", rt, true).into(),
1071 };
1072 let res = udf
1073 .invoke_with_args(args)
1074 .expect("that to_timestamp parsed values without error");
1075 let array = match res {
1076 ColumnarValue::Array(res) => res,
1077 _ => panic!("Expected a columnar array"),
1078 };
1079 let ty = array.data_type();
1080 assert!(matches!(ty, Timestamp(_, None)));
1081 }
1082 }
1083 }
1084
1085 #[test]
1086 fn test_to_timestamp_arg_validation() {
1087 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1088 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1089
1090 let data = date_string_builder.finish();
1091
1092 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1093 (Arc::new(to_timestamp), Nanosecond),
1094 (Arc::new(to_timestamp_micros), Microsecond),
1095 (Arc::new(to_timestamp_millis), Millisecond),
1096 (Arc::new(to_timestamp_nanos), Nanosecond),
1097 (Arc::new(to_timestamp_seconds), Second),
1098 ];
1099
1100 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1101 let mut millis_builder = TimestampMillisecondArray::builder(2);
1102 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1103 let mut sec_builder = TimestampSecondArray::builder(2);
1104
1105 nanos_builder.append_value(1599572549190850000);
1106 millis_builder.append_value(1599572549190);
1107 micros_builder.append_value(1599572549190850);
1108 sec_builder.append_value(1599572549);
1109
1110 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1111 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1112 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1113 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1114
1115 for (func, time_unit) in funcs {
1116 let string_array = [
1118 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1119 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1120 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1121 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1122 ];
1123 let parsed_timestamps = func(&string_array)
1124 .expect("that to_timestamp with format args parsed values without error");
1125 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1126 assert_eq!(parsed_array.len(), 1);
1127 match time_unit {
1128 Nanosecond => {
1129 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1130 }
1131 Millisecond => {
1132 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1133 }
1134 Microsecond => {
1135 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1136 }
1137 Second => {
1138 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1139 }
1140 };
1141 } else {
1142 panic!("Expected a columnar array")
1143 }
1144
1145 let string_array = [
1147 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1148 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1149 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1150 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1151 ];
1152 let parsed_timestamps = func(&string_array)
1153 .expect("that to_timestamp with format args parsed values without error");
1154 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1155 assert_eq!(parsed_array.len(), 1);
1156 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1157
1158 match time_unit {
1159 Nanosecond => {
1160 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1161 }
1162 Millisecond => {
1163 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1164 }
1165 Microsecond => {
1166 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1167 }
1168 Second => {
1169 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1170 }
1171 };
1172 } else {
1173 panic!("Expected a columnar array")
1174 }
1175
1176 let string_array = [
1178 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1179 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1180 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1181 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1182 ];
1183
1184 let expected = "Unsupported data type Int32 for function".to_string();
1185 let actual = func(&string_array).unwrap_err().to_string();
1186 assert_contains!(actual, expected);
1187
1188 let string_array = [
1190 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1191 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1192 vec![1i64].into(),
1193 None,
1194 )) as ArrayRef),
1195 ];
1196
1197 let expected = "Unsupported data type".to_string();
1198 let actual = func(&string_array).unwrap_err().to_string();
1199 assert_contains!(actual, expected);
1200 }
1201 }
1202}