1use std::sync::Arc;
19
20use crate::datetime::common::*;
21use arrow::array::timezone::Tz;
22use arrow::array::{
23 Array, Decimal128Array, Float16Array, Float32Array, Float64Array,
24 TimestampNanosecondArray,
25};
26use arrow::datatypes::DataType::*;
27use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
28use arrow::datatypes::{
29 ArrowTimestampType, DataType, TimestampMicrosecondType, TimestampMillisecondType,
30 TimestampNanosecondType, TimestampSecondType,
31};
32use datafusion_common::config::ConfigOptions;
33use datafusion_common::{Result, ScalarType, ScalarValue, exec_err};
34use datafusion_expr::{
35 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl,
36 Signature, Volatility,
37};
38use datafusion_macros::user_doc;
39
40#[user_doc(
41 doc_section(label = "Time and Date Functions"),
42 description = r#"
43Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
44integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
45if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
46Strings that parse without a time zone are treated as if they are in the
47session time zone, or UTC if no session time zone is set.
48Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
49
50Note: `to_timestamp` returns `Timestamp(ns, TimeZone)` where the time zone is the session time zone. The supported range
51for integer input is between`-9223372037` and `9223372036`. Supported range for string input is between
52`1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds`
53for the input outside of supported bounds.
54
55The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
56The time zone can be a value like +00:00, 'Europe/London' etc.
57"#,
58 syntax_example = "to_timestamp(expression[, ..., format_n])",
59 sql_example = r#"```sql
60> select to_timestamp('2023-01-31T09:26:56.123456789-05:00');
61+-----------------------------------------------------------+
62| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
63+-----------------------------------------------------------+
64| 2023-01-31T14:26:56.123456789 |
65+-----------------------------------------------------------+
66> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
67+--------------------------------------------------------------------------------------------------------+
68| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
69+--------------------------------------------------------------------------------------------------------+
70| 2023-05-17T03:59:00.123456789 |
71+--------------------------------------------------------------------------------------------------------+
72```
73Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
74"#,
75 argument(
76 name = "expression",
77 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
78 ),
79 argument(
80 name = "format_n",
81 description = r#"
82Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
83Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
84parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
85only supported at the end of the string preceded by a space.
86"#
87 )
88)]
89#[derive(Debug, PartialEq, Eq, Hash)]
90pub struct ToTimestampFunc {
91 signature: Signature,
92 timezone: Option<Arc<str>>,
93}
94
95#[user_doc(
96 doc_section(label = "Time and Date Functions"),
97 description = r#"
98Converts a value to a timestamp (`YYYY-MM-DDT00:00:00<TZ>`) in the session time zone. Supports strings,
99integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
100if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
101Strings that parse without a time zone are treated as if they are in the
102session time zone, or UTC if no session time zone is set.
103Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`).
104
105The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
106The time zone can be a value like +00:00, 'Europe/London' etc.
107"#,
108 syntax_example = "to_timestamp_seconds(expression[, ..., format_n])",
109 sql_example = r#"```sql
110> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00');
111+-------------------------------------------------------------------+
112| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
113+-------------------------------------------------------------------+
114| 2023-01-31T14:26:56 |
115+-------------------------------------------------------------------+
116> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
117+----------------------------------------------------------------------------------------------------------------+
118| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
119+----------------------------------------------------------------------------------------------------------------+
120| 2023-05-17T03:59:00 |
121+----------------------------------------------------------------------------------------------------------------+
122```
123Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
124"#,
125 argument(
126 name = "expression",
127 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
128 ),
129 argument(
130 name = "format_n",
131 description = r#"
132Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
133Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
134parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
135only supported at the end of the string preceded by a space.
136"#
137 )
138)]
139#[derive(Debug, PartialEq, Eq, Hash)]
140pub struct ToTimestampSecondsFunc {
141 signature: Signature,
142 timezone: Option<Arc<str>>,
143}
144
145#[user_doc(
146 doc_section(label = "Time and Date Functions"),
147 description = r#"
148Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000<TZ>`) in the session time zone. Supports strings,
149integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
150if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
151Strings that parse without a time zone are treated as if they are in the
152session time zone, or UTC if no session time zone is set.
153Integers, unsigned integers, and doubles are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`).
154
155The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
156The time zone can be a value like +00:00, 'Europe/London' etc.
157"#,
158 syntax_example = "to_timestamp_millis(expression[, ..., format_n])",
159 sql_example = r#"```sql
160> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00');
161+------------------------------------------------------------------+
162| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
163+------------------------------------------------------------------+
164| 2023-01-31T14:26:56.123 |
165+------------------------------------------------------------------+
166> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
167+---------------------------------------------------------------------------------------------------------------+
168| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
169+---------------------------------------------------------------------------------------------------------------+
170| 2023-05-17T03:59:00.123 |
171+---------------------------------------------------------------------------------------------------------------+
172```
173Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
174"#,
175 argument(
176 name = "expression",
177 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
178 ),
179 argument(
180 name = "format_n",
181 description = r#"
182Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
183Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
184parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
185only supported at the end of the string preceded by a space.
186"#
187 )
188)]
189#[derive(Debug, PartialEq, Eq, Hash)]
190pub struct ToTimestampMillisFunc {
191 signature: Signature,
192 timezone: Option<Arc<str>>,
193}
194
195#[user_doc(
196 doc_section(label = "Time and Date Functions"),
197 description = r#"
198Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000<TZ>`) in the session time zone. Supports strings,
199integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
200if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
201Strings that parse without a time zone are treated as if they are in the
202session time zone, or UTC if no session time zone is set.
203Integers, unsigned integers, and doubles are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`).
204
205The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
206The time zone can be a value like +00:00, 'Europe/London' etc.
207"#,
208 syntax_example = "to_timestamp_micros(expression[, ..., format_n])",
209 sql_example = r#"```sql
210> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00');
211+------------------------------------------------------------------+
212| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
213+------------------------------------------------------------------+
214| 2023-01-31T14:26:56.123456 |
215+------------------------------------------------------------------+
216> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
217+---------------------------------------------------------------------------------------------------------------+
218| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
219+---------------------------------------------------------------------------------------------------------------+
220| 2023-05-17T03:59:00.123456 |
221+---------------------------------------------------------------------------------------------------------------+
222```
223Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
224"#,
225 argument(
226 name = "expression",
227 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
228 ),
229 argument(
230 name = "format_n",
231 description = r#"
232Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
233Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
234parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
235only supported at the end of the string preceded by a space.
236"#
237 )
238)]
239#[derive(Debug, PartialEq, Eq, Hash)]
240pub struct ToTimestampMicrosFunc {
241 signature: Signature,
242 timezone: Option<Arc<str>>,
243}
244
245#[user_doc(
246 doc_section(label = "Time and Date Functions"),
247 description = r#"
248Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000<TZ>`) in the session time zone. Supports strings,
249integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00')
250if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided.
251Strings that parse without a time zone are treated as if they are in the
252session time zone. Integers, unsigned integers, and doubles are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`).
253
254The session time zone can be set using the statement `SET TIMEZONE = 'desired time zone'`.
255The time zone can be a value like +00:00, 'Europe/London' etc.
256"#,
257 syntax_example = "to_timestamp_nanos(expression[, ..., format_n])",
258 sql_example = r#"```sql
259> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00');
260+-----------------------------------------------------------------+
261| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) |
262+-----------------------------------------------------------------+
263| 2023-01-31T14:26:56.123456789 |
264+-----------------------------------------------------------------+
265> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y');
266+--------------------------------------------------------------------------------------------------------------+
267| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) |
268+--------------------------------------------------------------------------------------------------------------+
269| 2023-05-17T03:59:00.123456789 |
270+---------------------------------------------------------------------------------------------------------------+
271```
272Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
273"#,
274 argument(
275 name = "expression",
276 description = "Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators."
277 ),
278 argument(
279 name = "format_n",
280 description = r#"
281Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression.
282Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully
283parse the expression an error will be returned. Note: parsing of named timezones (e.g. 'America/New_York') using %Z is
284only supported at the end of the string preceded by a space.
285"#
286 )
287)]
288#[derive(Debug, PartialEq, Eq, Hash)]
289pub struct ToTimestampNanosFunc {
290 signature: Signature,
291 timezone: Option<Arc<str>>,
292}
293
294macro_rules! impl_to_timestamp_constructors {
297 ($func:ty) => {
298 impl Default for $func {
299 fn default() -> Self {
300 Self::new_with_config(&ConfigOptions::default())
301 }
302 }
303
304 impl $func {
305 #[deprecated(since = "52.0.0", note = "use `new_with_config` instead")]
306 pub fn new() -> Self {
312 Self::new_with_config(&ConfigOptions::default())
313 }
314
315 pub fn new_with_config(config: &ConfigOptions) -> Self {
316 Self {
317 signature: Signature::variadic_any(Volatility::Immutable),
318 timezone: config
319 .execution
320 .time_zone
321 .as_ref()
322 .map(|tz| Arc::from(tz.as_str())),
323 }
324 }
325 }
326 };
327}
328
329impl_to_timestamp_constructors!(ToTimestampFunc);
330impl_to_timestamp_constructors!(ToTimestampSecondsFunc);
331impl_to_timestamp_constructors!(ToTimestampMillisFunc);
332impl_to_timestamp_constructors!(ToTimestampMicrosFunc);
333impl_to_timestamp_constructors!(ToTimestampNanosFunc);
334
335fn decimal_to_nanoseconds(value: i128, scale: i8) -> i64 {
336 let nanos_exponent = 9_i16 - scale as i16;
337 let timestamp_nanos = if nanos_exponent >= 0 {
338 value * 10_i128.pow(nanos_exponent as u32)
339 } else {
340 value / 10_i128.pow(nanos_exponent.unsigned_abs() as u32)
341 };
342 timestamp_nanos as i64
343}
344
345fn decimal128_to_timestamp_nanos(
346 arg: &ColumnarValue,
347 tz: Option<Arc<str>>,
348) -> Result<ColumnarValue> {
349 match arg {
350 ColumnarValue::Scalar(ScalarValue::Decimal128(Some(value), _, scale)) => {
351 let timestamp_nanos = decimal_to_nanoseconds(*value, *scale);
352 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
353 Some(timestamp_nanos),
354 tz,
355 )))
356 }
357 ColumnarValue::Scalar(ScalarValue::Decimal128(None, _, _)) => Ok(
358 ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(None, tz)),
359 ),
360 ColumnarValue::Array(arr) => {
361 let decimal_arr = downcast_arg!(arr, Decimal128Array);
362 let scale = decimal_arr.scale();
363 let result: TimestampNanosecondArray = decimal_arr
364 .iter()
365 .map(|v| v.map(|val| decimal_to_nanoseconds(val, scale)))
366 .collect();
367 let result = result.with_timezone_opt(tz);
368 Ok(ColumnarValue::Array(Arc::new(result)))
369 }
370 _ => exec_err!("Invalid Decimal128 value for to_timestamp"),
371 }
372}
373
374macro_rules! impl_with_updated_config {
382 () => {
383 fn with_updated_config(&self, config: &ConfigOptions) -> Option<ScalarUDF> {
384 Some(Self::new_with_config(config).into())
385 }
386 };
387}
388
389impl ScalarUDFImpl for ToTimestampFunc {
390 fn name(&self) -> &str {
391 "to_timestamp"
392 }
393
394 fn signature(&self) -> &Signature {
395 &self.signature
396 }
397
398 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
399 Ok(Timestamp(Nanosecond, self.timezone.clone()))
400 }
401
402 impl_with_updated_config!();
403
404 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
405 let ScalarFunctionArgs { args, .. } = args;
406
407 if args.is_empty() {
408 return exec_err!(
409 "to_timestamp function requires 1 or more arguments, got {}",
410 args.len()
411 );
412 }
413
414 if args.len() > 1 {
416 validate_data_types(&args, "to_timestamp")?;
417 }
418
419 let tz = self.timezone.clone();
420
421 match args[0].data_type() {
422 Int8 | Int16 | Int32 | Int64 | UInt8 | UInt16 | UInt32 | UInt64 => args[0]
423 .cast_to(&Timestamp(Second, None), None)?
424 .cast_to(&Timestamp(Nanosecond, tz), None),
425 Null | Timestamp(_, _) => args[0].cast_to(&Timestamp(Nanosecond, tz), None),
426 Float16 => match &args[0] {
427 ColumnarValue::Scalar(ScalarValue::Float16(value)) => {
428 let timestamp_nanos =
429 value.map(|v| (v.to_f64() * 1_000_000_000.0) as i64);
430 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
431 timestamp_nanos,
432 tz,
433 )))
434 }
435 ColumnarValue::Array(arr) => {
436 let f16_arr = downcast_arg!(arr, Float16Array);
437 let result: TimestampNanosecondArray =
438 f16_arr.unary(|x| (x.to_f64() * 1_000_000_000.0) as i64);
439 Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
440 }
441 _ => exec_err!("Invalid Float16 value for to_timestamp"),
442 },
443 Float32 => match &args[0] {
444 ColumnarValue::Scalar(ScalarValue::Float32(value)) => {
445 let timestamp_nanos =
446 value.map(|v| (v as f64 * 1_000_000_000.0) as i64);
447 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
448 timestamp_nanos,
449 tz,
450 )))
451 }
452 ColumnarValue::Array(arr) => {
453 let f32_arr = downcast_arg!(arr, Float32Array);
454 let result: TimestampNanosecondArray =
455 f32_arr.unary(|x| (x as f64 * 1_000_000_000.0) as i64);
456 Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
457 }
458 _ => exec_err!("Invalid Float32 value for to_timestamp"),
459 },
460 Float64 => match &args[0] {
461 ColumnarValue::Scalar(ScalarValue::Float64(value)) => {
462 let timestamp_nanos = value.map(|v| (v * 1_000_000_000.0) as i64);
463 Ok(ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
464 timestamp_nanos,
465 tz,
466 )))
467 }
468 ColumnarValue::Array(arr) => {
469 let f64_arr = downcast_arg!(arr, Float64Array);
470 let result: TimestampNanosecondArray =
471 f64_arr.unary(|x| (x * 1_000_000_000.0) as i64);
472 Ok(ColumnarValue::Array(Arc::new(result.with_timezone_opt(tz))))
473 }
474 _ => exec_err!("Invalid Float64 value for to_timestamp"),
475 },
476 Decimal32(_, _) | Decimal64(_, _) | Decimal256(_, _) => {
477 let arg = args[0].cast_to(&Decimal128(38, 9), None)?;
478 decimal128_to_timestamp_nanos(&arg, tz)
479 }
480 Decimal128(_, _) => decimal128_to_timestamp_nanos(&args[0], tz),
481 Utf8View | LargeUtf8 | Utf8 => {
482 to_timestamp_impl::<TimestampNanosecondType>(&args, "to_timestamp", &tz)
483 }
484 other => {
485 exec_err!("Unsupported data type {other} for function to_timestamp")
486 }
487 }
488 }
489
490 fn documentation(&self) -> Option<&Documentation> {
491 self.doc()
492 }
493}
494
495impl ScalarUDFImpl for ToTimestampSecondsFunc {
496 fn name(&self) -> &str {
497 "to_timestamp_seconds"
498 }
499
500 fn signature(&self) -> &Signature {
501 &self.signature
502 }
503
504 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
505 Ok(Timestamp(Second, self.timezone.clone()))
506 }
507
508 impl_with_updated_config!();
509
510 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
511 let ScalarFunctionArgs { args, .. } = args;
512
513 if args.is_empty() {
514 return exec_err!(
515 "to_timestamp_seconds function requires 1 or more arguments, got {}",
516 args.len()
517 );
518 }
519
520 if args.len() > 1 {
522 validate_data_types(&args, "to_timestamp")?;
523 }
524
525 let tz = self.timezone.clone();
526
527 match args[0].data_type() {
528 Null
529 | Int8
530 | Int16
531 | Int32
532 | Int64
533 | UInt8
534 | UInt16
535 | UInt32
536 | UInt64
537 | Timestamp(_, _)
538 | Decimal32(_, _)
539 | Decimal64(_, _)
540 | Decimal128(_, _)
541 | Decimal256(_, _) => args[0].cast_to(&Timestamp(Second, tz), None),
542 Float16 | Float32 | Float64 => args[0]
543 .cast_to(&Int64, None)?
544 .cast_to(&Timestamp(Second, tz), None),
545 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampSecondType>(
546 &args,
547 "to_timestamp_seconds",
548 &self.timezone,
549 ),
550 other => {
551 exec_err!(
552 "Unsupported data type {} for function to_timestamp_seconds",
553 other
554 )
555 }
556 }
557 }
558
559 fn documentation(&self) -> Option<&Documentation> {
560 self.doc()
561 }
562}
563
564impl ScalarUDFImpl for ToTimestampMillisFunc {
565 fn name(&self) -> &str {
566 "to_timestamp_millis"
567 }
568
569 fn signature(&self) -> &Signature {
570 &self.signature
571 }
572
573 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
574 Ok(Timestamp(Millisecond, self.timezone.clone()))
575 }
576
577 impl_with_updated_config!();
578
579 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
580 let ScalarFunctionArgs { args, .. } = args;
581
582 if args.is_empty() {
583 return exec_err!(
584 "to_timestamp_millis function requires 1 or more arguments, got {}",
585 args.len()
586 );
587 }
588
589 if args.len() > 1 {
591 validate_data_types(&args, "to_timestamp")?;
592 }
593
594 match args[0].data_type() {
595 Null
596 | Int8
597 | Int16
598 | Int32
599 | Int64
600 | UInt8
601 | UInt16
602 | UInt32
603 | UInt64
604 | Timestamp(_, _)
605 | Decimal32(_, _)
606 | Decimal64(_, _)
607 | Decimal128(_, _)
608 | Decimal256(_, _) => {
609 args[0].cast_to(&Timestamp(Millisecond, self.timezone.clone()), None)
610 }
611 Float16 | Float32 | Float64 => args[0]
612 .cast_to(&Int64, None)?
613 .cast_to(&Timestamp(Millisecond, self.timezone.clone()), None),
614 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMillisecondType>(
615 &args,
616 "to_timestamp_millis",
617 &self.timezone,
618 ),
619 other => {
620 exec_err!(
621 "Unsupported data type {} for function to_timestamp_millis",
622 other
623 )
624 }
625 }
626 }
627
628 fn documentation(&self) -> Option<&Documentation> {
629 self.doc()
630 }
631}
632
633impl ScalarUDFImpl for ToTimestampMicrosFunc {
634 fn name(&self) -> &str {
635 "to_timestamp_micros"
636 }
637
638 fn signature(&self) -> &Signature {
639 &self.signature
640 }
641
642 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
643 Ok(Timestamp(Microsecond, self.timezone.clone()))
644 }
645
646 impl_with_updated_config!();
647
648 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
649 let ScalarFunctionArgs { args, .. } = args;
650
651 if args.is_empty() {
652 return exec_err!(
653 "to_timestamp_micros function requires 1 or more arguments, got {}",
654 args.len()
655 );
656 }
657
658 if args.len() > 1 {
660 validate_data_types(&args, "to_timestamp")?;
661 }
662
663 match args[0].data_type() {
664 Null
665 | Int8
666 | Int16
667 | Int32
668 | Int64
669 | UInt8
670 | UInt16
671 | UInt32
672 | UInt64
673 | Timestamp(_, _)
674 | Decimal32(_, _)
675 | Decimal64(_, _)
676 | Decimal128(_, _)
677 | Decimal256(_, _) => {
678 args[0].cast_to(&Timestamp(Microsecond, self.timezone.clone()), None)
679 }
680 Float16 | Float32 | Float64 => args[0]
681 .cast_to(&Int64, None)?
682 .cast_to(&Timestamp(Microsecond, self.timezone.clone()), None),
683 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampMicrosecondType>(
684 &args,
685 "to_timestamp_micros",
686 &self.timezone,
687 ),
688 other => {
689 exec_err!(
690 "Unsupported data type {} for function to_timestamp_micros",
691 other
692 )
693 }
694 }
695 }
696
697 fn documentation(&self) -> Option<&Documentation> {
698 self.doc()
699 }
700}
701
702impl ScalarUDFImpl for ToTimestampNanosFunc {
703 fn name(&self) -> &str {
704 "to_timestamp_nanos"
705 }
706
707 fn signature(&self) -> &Signature {
708 &self.signature
709 }
710
711 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
712 Ok(Timestamp(Nanosecond, self.timezone.clone()))
713 }
714
715 impl_with_updated_config!();
716
717 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
718 let ScalarFunctionArgs { args, .. } = args;
719
720 if args.is_empty() {
721 return exec_err!(
722 "to_timestamp_nanos function requires 1 or more arguments, got {}",
723 args.len()
724 );
725 }
726
727 if args.len() > 1 {
729 validate_data_types(&args, "to_timestamp")?;
730 }
731
732 match args[0].data_type() {
733 Null
734 | Int8
735 | Int16
736 | Int32
737 | Int64
738 | UInt8
739 | UInt16
740 | UInt32
741 | UInt64
742 | Timestamp(_, _)
743 | Decimal32(_, _)
744 | Decimal64(_, _)
745 | Decimal128(_, _)
746 | Decimal256(_, _) => {
747 args[0].cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None)
748 }
749 Float16 | Float32 | Float64 => args[0]
750 .cast_to(&Int64, None)?
751 .cast_to(&Timestamp(Nanosecond, self.timezone.clone()), None),
752 Utf8View | LargeUtf8 | Utf8 => to_timestamp_impl::<TimestampNanosecondType>(
753 &args,
754 "to_timestamp_nanos",
755 &self.timezone,
756 ),
757 other => {
758 exec_err!(
759 "Unsupported data type {} for function to_timestamp_nanos",
760 other
761 )
762 }
763 }
764 }
765
766 fn documentation(&self) -> Option<&Documentation> {
767 self.doc()
768 }
769}
770
771fn to_timestamp_impl<T: ArrowTimestampType + ScalarType<i64>>(
772 args: &[ColumnarValue],
773 name: &str,
774 timezone: &Option<Arc<str>>,
775) -> Result<ColumnarValue> {
776 let factor = match T::UNIT {
777 Second => 1_000_000_000,
778 Millisecond => 1_000_000,
779 Microsecond => 1_000,
780 Nanosecond => 1,
781 };
782
783 let tz = match timezone.clone() {
784 Some(tz) => Some(tz.parse::<Tz>()?),
785 None => None,
786 };
787
788 match args.len() {
789 1 => handle::<T, _>(
790 args,
791 move |s| string_to_timestamp_nanos_with_timezone(&tz, s).map(|n| n / factor),
792 name,
793 &Timestamp(T::UNIT, timezone.clone()),
794 ),
795 n if n >= 2 => handle_multiple::<T, _, _>(
796 args,
797 move |s, format| {
798 string_to_timestamp_nanos_formatted_with_timezone(&tz, s, format)
799 },
800 |n| n / factor,
801 name,
802 &Timestamp(T::UNIT, timezone.clone()),
803 ),
804 _ => exec_err!("Unsupported 0 argument count for function {name}"),
805 }
806}
807
808#[cfg(test)]
809mod tests {
810
811 use arrow::array::types::Int64Type;
812 use arrow::array::{
813 Array, PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray,
814 TimestampNanosecondArray, TimestampSecondArray,
815 };
816 use arrow::array::{ArrayRef, Int64Array, StringBuilder};
817 use arrow::datatypes::{Field, TimeUnit};
818 use chrono::{DateTime, FixedOffset, Utc};
819 use datafusion_common::{DataFusionError, assert_contains};
820 use datafusion_expr::ScalarFunctionImplementation;
821
822 use super::*;
823
824 fn to_timestamp(args: &[ColumnarValue]) -> Result<ColumnarValue> {
825 let timezone: Option<Arc<str>> = Some("UTC".into());
826 to_timestamp_impl::<TimestampNanosecondType>(args, "to_timestamp", &timezone)
827 }
828
829 fn to_timestamp_millis(args: &[ColumnarValue]) -> Result<ColumnarValue> {
831 let timezone: Option<Arc<str>> = Some("UTC".into());
832 to_timestamp_impl::<TimestampMillisecondType>(
833 args,
834 "to_timestamp_millis",
835 &timezone,
836 )
837 }
838
839 fn to_timestamp_micros(args: &[ColumnarValue]) -> Result<ColumnarValue> {
841 let timezone: Option<Arc<str>> = Some("UTC".into());
842 to_timestamp_impl::<TimestampMicrosecondType>(
843 args,
844 "to_timestamp_micros",
845 &timezone,
846 )
847 }
848
849 fn to_timestamp_nanos(args: &[ColumnarValue]) -> Result<ColumnarValue> {
851 let timezone: Option<Arc<str>> = Some("UTC".into());
852 to_timestamp_impl::<TimestampNanosecondType>(
853 args,
854 "to_timestamp_nanos",
855 &timezone,
856 )
857 }
858
859 fn to_timestamp_seconds(args: &[ColumnarValue]) -> Result<ColumnarValue> {
861 let timezone: Option<Arc<str>> = Some("UTC".into());
862 to_timestamp_impl::<TimestampSecondType>(args, "to_timestamp_seconds", &timezone)
863 }
864
865 fn udfs_and_timeunit() -> Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> {
866 let udfs: Vec<(Box<dyn ScalarUDFImpl>, TimeUnit)> = vec![
867 (
868 Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
869 Nanosecond,
870 ),
871 (
872 Box::new(ToTimestampSecondsFunc::new_with_config(
873 &ConfigOptions::default(),
874 )),
875 Second,
876 ),
877 (
878 Box::new(ToTimestampMillisFunc::new_with_config(
879 &ConfigOptions::default(),
880 )),
881 Millisecond,
882 ),
883 (
884 Box::new(ToTimestampMicrosFunc::new_with_config(
885 &ConfigOptions::default(),
886 )),
887 Microsecond,
888 ),
889 (
890 Box::new(ToTimestampNanosFunc::new_with_config(
891 &ConfigOptions::default(),
892 )),
893 Nanosecond,
894 ),
895 ];
896 udfs
897 }
898
899 fn validate_expected_error(
900 options: &mut ConfigOptions,
901 args: ScalarFunctionArgs,
902 expected_err: &str,
903 ) {
904 let udfs = udfs_and_timeunit();
905
906 for (udf, _) in udfs {
907 match udf
908 .with_updated_config(options)
909 .unwrap()
910 .invoke_with_args(args.clone())
911 {
912 Ok(_) => panic!("Expected error but got success"),
913 Err(e) => {
914 assert!(
915 e.to_string().contains(expected_err),
916 "Can not find expected error '{expected_err}'. Actual error '{e}'"
917 );
918 }
919 }
920 }
921 }
922
923 #[test]
924 fn to_timestamp_arrays_and_nulls() -> Result<()> {
925 let mut string_builder = StringBuilder::with_capacity(2, 1024);
928 let mut ts_builder = TimestampNanosecondArray::builder(2);
929
930 string_builder.append_value("2020-09-08T13:42:29.190855");
931 ts_builder.append_value(1599572549190855000);
932
933 string_builder.append_null();
934 ts_builder.append_null();
935 let expected_timestamps = &ts_builder.finish() as &dyn Array;
936
937 let string_array =
938 ColumnarValue::Array(Arc::new(string_builder.finish()) as ArrayRef);
939 let parsed_timestamps = to_timestamp(&[string_array])
940 .expect("that to_timestamp parsed values without error");
941 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
942 assert_eq!(parsed_array.len(), 2);
943 assert_eq!(expected_timestamps, parsed_array.as_ref());
944 } else {
945 panic!("Expected a columnar array")
946 }
947 Ok(())
948 }
949
950 #[test]
951 fn to_timestamp_with_formats_arrays_and_nulls() -> Result<()> {
952 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
955 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
956 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
957 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
958 let mut ts_builder = TimestampNanosecondArray::builder(2);
959
960 date_string_builder.append_null();
961 format1_builder.append_null();
962 format2_builder.append_null();
963 format3_builder.append_null();
964 ts_builder.append_null();
965
966 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
967 format1_builder.append_value("%s");
968 format2_builder.append_value("%c");
969 format3_builder.append_value("%+");
970 ts_builder.append_value(1599572549190850000);
971
972 let expected_timestamps = &ts_builder.finish() as &dyn Array;
973
974 let string_array = [
975 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
976 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
977 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
978 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
979 ];
980 let parsed_timestamps = to_timestamp(&string_array)
981 .expect("that to_timestamp with format args parsed values without error");
982 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
983 assert_eq!(parsed_array.len(), 2);
984 assert_eq!(expected_timestamps, parsed_array.as_ref());
985 } else {
986 panic!("Expected a columnar array")
987 }
988 Ok(())
989 }
990
991 #[test]
992 fn to_timestamp_respects_execution_timezone() -> Result<()> {
993 let udfs = udfs_and_timeunit();
994
995 let mut options = ConfigOptions::default();
996 options.execution.time_zone = Some("-05:00".to_string());
997
998 let time_zone: Option<Arc<str>> = options
999 .execution
1000 .time_zone
1001 .as_ref()
1002 .map(|tz| Arc::from(tz.as_str()));
1003
1004 for (udf, time_unit) in udfs {
1005 let field = Field::new("arg", Utf8, true).into();
1006
1007 let args = ScalarFunctionArgs {
1008 args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1009 "2020-09-08T13:42:29".to_string(),
1010 )))],
1011 arg_fields: vec![field],
1012 number_rows: 1,
1013 return_field: Field::new(
1014 "f",
1015 Timestamp(time_unit, Some("-05:00".into())),
1016 true,
1017 )
1018 .into(),
1019 config_options: Arc::new(options.clone()),
1020 };
1021
1022 let result = udf
1023 .with_updated_config(&options.clone())
1024 .unwrap()
1025 .invoke_with_args(args)?;
1026 let result = match time_unit {
1027 Second => {
1028 let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
1029 Some(value),
1030 tz,
1031 )) = result
1032 else {
1033 panic!("expected scalar timestamp");
1034 };
1035
1036 assert_eq!(tz, time_zone);
1037
1038 value
1039 }
1040 Millisecond => {
1041 let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
1042 Some(value),
1043 tz,
1044 )) = result
1045 else {
1046 panic!("expected scalar timestamp");
1047 };
1048
1049 assert_eq!(tz, time_zone);
1050
1051 value
1052 }
1053 Microsecond => {
1054 let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
1055 Some(value),
1056 tz,
1057 )) = result
1058 else {
1059 panic!("expected scalar timestamp");
1060 };
1061
1062 assert_eq!(tz, time_zone);
1063
1064 value
1065 }
1066 Nanosecond => {
1067 let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
1068 Some(value),
1069 tz,
1070 )) = result
1071 else {
1072 panic!("expected scalar timestamp");
1073 };
1074
1075 assert_eq!(tz, time_zone);
1076
1077 value
1078 }
1079 };
1080
1081 let scale = match time_unit {
1082 Second => 1_000_000_000,
1083 Millisecond => 1_000_000,
1084 Microsecond => 1_000,
1085 Nanosecond => 1,
1086 };
1087
1088 let offset = FixedOffset::west_opt(5 * 3600).unwrap();
1089 let result = Some(
1090 DateTime::<Utc>::from_timestamp_nanos(result * scale)
1091 .with_timezone(&offset)
1092 .to_string(),
1093 );
1094
1095 assert_eq!(result, Some("2020-09-08 13:42:29 -05:00".to_string()));
1096 }
1097
1098 Ok(())
1099 }
1100
1101 #[test]
1102 fn to_timestamp_formats_respects_execution_timezone() -> Result<()> {
1103 let udfs = udfs_and_timeunit();
1104
1105 let mut options = ConfigOptions::default();
1106 options.execution.time_zone = Some("-05:00".to_string());
1107
1108 let time_zone: Option<Arc<str>> = options
1109 .execution
1110 .time_zone
1111 .as_ref()
1112 .map(|tz| Arc::from(tz.as_str()));
1113
1114 let expr_field = Field::new("arg", Utf8, true).into();
1115 let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1116
1117 for (udf, time_unit) in udfs {
1118 for (value, format, expected_str) in [
1119 (
1120 "2020-09-08 09:42:29 -05:00",
1121 "%Y-%m-%d %H:%M:%S %z",
1122 Some("2020-09-08 09:42:29 -05:00"),
1123 ),
1124 (
1125 "2020-09-08T13:42:29Z",
1126 "%+",
1127 Some("2020-09-08 08:42:29 -05:00"),
1128 ),
1129 (
1130 "2020-09-08 13:42:29 UTC",
1131 "%Y-%m-%d %H:%M:%S %Z",
1132 Some("2020-09-08 08:42:29 -05:00"),
1133 ),
1134 (
1135 "+0000 2024-01-01 12:00:00",
1136 "%z %Y-%m-%d %H:%M:%S",
1137 Some("2024-01-01 07:00:00 -05:00"),
1138 ),
1139 (
1140 "20200908134229+0100",
1141 "%Y%m%d%H%M%S%z",
1142 Some("2020-09-08 07:42:29 -05:00"),
1143 ),
1144 (
1145 "2020-09-08+0230 13:42",
1146 "%Y-%m-%d%z %H:%M",
1147 Some("2020-09-08 06:12:00 -05:00"),
1148 ),
1149 ] {
1150 let args = ScalarFunctionArgs {
1151 args: vec![
1152 ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1153 ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1154 format.to_string(),
1155 ))),
1156 ],
1157 arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1158 number_rows: 1,
1159 return_field: Field::new(
1160 "f",
1161 Timestamp(time_unit, Some("-05:00".into())),
1162 true,
1163 )
1164 .into(),
1165 config_options: Arc::new(options.clone()),
1166 };
1167 let result = udf
1168 .with_updated_config(&options.clone())
1169 .unwrap()
1170 .invoke_with_args(args)?;
1171 let result = match time_unit {
1172 Second => {
1173 let ColumnarValue::Scalar(ScalarValue::TimestampSecond(
1174 Some(value),
1175 tz,
1176 )) = result
1177 else {
1178 panic!("expected scalar timestamp");
1179 };
1180
1181 assert_eq!(tz, time_zone);
1182
1183 value
1184 }
1185 Millisecond => {
1186 let ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(
1187 Some(value),
1188 tz,
1189 )) = result
1190 else {
1191 panic!("expected scalar timestamp");
1192 };
1193
1194 assert_eq!(tz, time_zone);
1195
1196 value
1197 }
1198 Microsecond => {
1199 let ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(
1200 Some(value),
1201 tz,
1202 )) = result
1203 else {
1204 panic!("expected scalar timestamp");
1205 };
1206
1207 assert_eq!(tz, time_zone);
1208
1209 value
1210 }
1211 Nanosecond => {
1212 let ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(
1213 Some(value),
1214 tz,
1215 )) = result
1216 else {
1217 panic!("expected scalar timestamp");
1218 };
1219
1220 assert_eq!(tz, time_zone);
1221
1222 value
1223 }
1224 };
1225
1226 let scale = match time_unit {
1227 Second => 1_000_000_000,
1228 Millisecond => 1_000_000,
1229 Microsecond => 1_000,
1230 Nanosecond => 1,
1231 };
1232 let offset = FixedOffset::west_opt(5 * 3600).unwrap();
1233 let result = Some(
1234 DateTime::<Utc>::from_timestamp_nanos(result * scale)
1235 .with_timezone(&offset)
1236 .to_string(),
1237 );
1238
1239 assert_eq!(result, expected_str.map(|s| s.to_string()));
1240 }
1241 }
1242
1243 Ok(())
1244 }
1245
1246 #[test]
1247 fn to_timestamp_invalid_execution_timezone_behavior() -> Result<()> {
1248 let field: Arc<Field> = Field::new("arg", Utf8, true).into();
1249 let return_field: Arc<Field> =
1250 Field::new("f", Timestamp(Nanosecond, None), true).into();
1251
1252 let mut options = ConfigOptions::default();
1253 options.execution.time_zone = Some("Invalid/Timezone".to_string());
1254
1255 let args = ScalarFunctionArgs {
1256 args: vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1257 "2020-09-08T13:42:29Z".to_string(),
1258 )))],
1259 arg_fields: vec![Arc::clone(&field)],
1260 number_rows: 1,
1261 return_field: Arc::clone(&return_field),
1262 config_options: Arc::new(options.clone()),
1263 };
1264
1265 let expected_err =
1266 "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1267
1268 validate_expected_error(&mut options, args, expected_err);
1269
1270 Ok(())
1271 }
1272
1273 #[test]
1274 fn to_timestamp_formats_invalid_execution_timezone_behavior() -> Result<()> {
1275 let expr_field: Arc<Field> = Field::new("arg", Utf8, true).into();
1276 let format_field: Arc<Field> = Field::new("fmt", Utf8, true).into();
1277 let return_field: Arc<Field> =
1278 Field::new("f", Timestamp(Nanosecond, None), true).into();
1279
1280 let mut options = ConfigOptions::default();
1281 options.execution.time_zone = Some("Invalid/Timezone".to_string());
1282
1283 let expected_err =
1284 "Invalid timezone \"Invalid/Timezone\": failed to parse timezone";
1285
1286 let make_args = |value: &str, format: &str| ScalarFunctionArgs {
1287 args: vec![
1288 ColumnarValue::Scalar(ScalarValue::Utf8(Some(value.to_string()))),
1289 ColumnarValue::Scalar(ScalarValue::Utf8(Some(format.to_string()))),
1290 ],
1291 arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1292 number_rows: 1,
1293 return_field: Arc::clone(&return_field),
1294 config_options: Arc::new(options.clone()),
1295 };
1296
1297 for (value, format, _expected_str) in [
1298 (
1299 "2020-09-08 09:42:29 -05:00",
1300 "%Y-%m-%d %H:%M:%S %z",
1301 Some("2020-09-08 09:42:29 -05:00"),
1302 ),
1303 (
1304 "2020-09-08T13:42:29Z",
1305 "%+",
1306 Some("2020-09-08 08:42:29 -05:00"),
1307 ),
1308 (
1309 "2020-09-08 13:42:29 +0000",
1310 "%Y-%m-%d %H:%M:%S %z",
1311 Some("2020-09-08 08:42:29 -05:00"),
1312 ),
1313 (
1314 "+0000 2024-01-01 12:00:00",
1315 "%z %Y-%m-%d %H:%M:%S",
1316 Some("2024-01-01 07:00:00 -05:00"),
1317 ),
1318 (
1319 "20200908134229+0100",
1320 "%Y%m%d%H%M%S%z",
1321 Some("2020-09-08 07:42:29 -05:00"),
1322 ),
1323 (
1324 "2020-09-08+0230 13:42",
1325 "%Y-%m-%d%z %H:%M",
1326 Some("2020-09-08 06:12:00 -05:00"),
1327 ),
1328 ] {
1329 let args = make_args(value, format);
1330 validate_expected_error(&mut options.clone(), args, expected_err);
1331 }
1332
1333 let args = ScalarFunctionArgs {
1334 args: vec![
1335 ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1336 "2020-09-08T13:42:29".to_string(),
1337 ))),
1338 ColumnarValue::Scalar(ScalarValue::Utf8(Some(
1339 "%Y-%m-%dT%H:%M:%S".to_string(),
1340 ))),
1341 ],
1342 arg_fields: vec![Arc::clone(&expr_field), Arc::clone(&format_field)],
1343 number_rows: 1,
1344 return_field: Arc::clone(&return_field),
1345 config_options: Arc::new(options.clone()),
1346 };
1347
1348 validate_expected_error(&mut options.clone(), args, expected_err);
1349
1350 Ok(())
1351 }
1352
1353 #[test]
1354 fn to_timestamp_invalid_input_type() -> Result<()> {
1355 let mut builder = Int64Array::builder(1);
1359 builder.append_value(1);
1360 let int64array = ColumnarValue::Array(Arc::new(builder.finish()));
1361
1362 let expected_err =
1363 "Execution error: Unsupported data type Int64 for function to_timestamp";
1364 match to_timestamp(&[int64array]) {
1365 Ok(_) => panic!("Expected error but got success"),
1366 Err(e) => {
1367 assert!(
1368 e.to_string().contains(expected_err),
1369 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1370 );
1371 }
1372 }
1373 Ok(())
1374 }
1375
1376 #[test]
1377 fn to_timestamp_with_formats_invalid_input_type() -> Result<()> {
1378 let mut builder = Int64Array::builder(1);
1382 builder.append_value(1);
1383 let int64array = [
1384 ColumnarValue::Array(Arc::new(builder.finish())),
1385 ColumnarValue::Array(Arc::new(builder.finish())),
1386 ];
1387
1388 let expected_err =
1389 "Execution error: Unsupported data type Int64 for function to_timestamp";
1390 match to_timestamp(&int64array) {
1391 Ok(_) => panic!("Expected error but got success"),
1392 Err(e) => {
1393 assert!(
1394 e.to_string().contains(expected_err),
1395 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1396 );
1397 }
1398 }
1399 Ok(())
1400 }
1401
1402 #[test]
1403 fn to_timestamp_with_unparsable_data() -> Result<()> {
1404 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1405
1406 date_string_builder.append_null();
1407
1408 date_string_builder.append_value("2020-09-08 - 13:42:29.19085Z");
1409
1410 let string_array =
1411 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1412
1413 let expected_err = "Arrow error: Parser error: Error parsing timestamp from '2020-09-08 - 13:42:29.19085Z': error parsing time";
1414 match to_timestamp(&[string_array]) {
1415 Ok(_) => panic!("Expected error but got success"),
1416 Err(e) => {
1417 assert!(
1418 e.to_string().contains(expected_err),
1419 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1420 );
1421 }
1422 }
1423 Ok(())
1424 }
1425
1426 #[test]
1427 fn to_timestamp_with_invalid_tz() -> Result<()> {
1428 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1429
1430 date_string_builder.append_null();
1431
1432 date_string_builder.append_value("2020-09-08T13:42:29ZZ");
1433
1434 let string_array =
1435 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef);
1436
1437 let expected_err = "Arrow error: Parser error: Invalid timezone \"ZZ\": failed to parse timezone";
1438 match to_timestamp(&[string_array]) {
1439 Ok(_) => panic!("Expected error but got success"),
1440 Err(e) => {
1441 assert!(
1442 e.to_string().contains(expected_err),
1443 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1444 );
1445 }
1446 }
1447 Ok(())
1448 }
1449
1450 #[test]
1451 fn to_timestamp_with_no_matching_formats() -> Result<()> {
1452 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1453 let mut format1_builder = StringBuilder::with_capacity(2, 1024);
1454 let mut format2_builder = StringBuilder::with_capacity(2, 1024);
1455 let mut format3_builder = StringBuilder::with_capacity(2, 1024);
1456
1457 date_string_builder.append_null();
1458 format1_builder.append_null();
1459 format2_builder.append_null();
1460 format3_builder.append_null();
1461
1462 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1463 format1_builder.append_value("%s");
1464 format2_builder.append_value("%c");
1465 format3_builder.append_value("%H:%M:%S");
1466
1467 let string_array = [
1468 ColumnarValue::Array(Arc::new(date_string_builder.finish()) as ArrayRef),
1469 ColumnarValue::Array(Arc::new(format1_builder.finish()) as ArrayRef),
1470 ColumnarValue::Array(Arc::new(format2_builder.finish()) as ArrayRef),
1471 ColumnarValue::Array(Arc::new(format3_builder.finish()) as ArrayRef),
1472 ];
1473
1474 let expected_err = "Execution error: Error parsing timestamp from '2020-09-08T13:42:29.19085Z' using format '%H:%M:%S': input contains invalid characters";
1475 match to_timestamp(&string_array) {
1476 Ok(_) => panic!("Expected error but got success"),
1477 Err(e) => {
1478 assert!(
1479 e.to_string().contains(expected_err),
1480 "Can not find expected error '{expected_err}'. Actual error '{e}'"
1481 );
1482 }
1483 }
1484 Ok(())
1485 }
1486
1487 #[test]
1488 fn string_to_timestamp_formatted() {
1489 assert_eq!(
1491 1599572549190855000,
1492 parse_timestamp_formatted("2020-09-08T13:42:29.190855+00:00", "%+").unwrap()
1493 );
1494 assert_eq!(
1495 1599572549190855000,
1496 parse_timestamp_formatted("2020-09-08T13:42:29.190855Z", "%+").unwrap()
1497 );
1498 assert_eq!(
1499 1599572549000000000,
1500 parse_timestamp_formatted("2020-09-08T13:42:29Z", "%+").unwrap()
1501 ); assert_eq!(
1503 1599590549190855000,
1504 parse_timestamp_formatted("2020-09-08T13:42:29.190855-05:00", "%+").unwrap()
1505 );
1506 assert_eq!(
1507 1599590549000000000,
1508 parse_timestamp_formatted("1599590549", "%s").unwrap()
1509 );
1510 assert_eq!(
1511 1599572549000000000,
1512 parse_timestamp_formatted("09-08-2020 13/42/29", "%m-%d-%Y %H/%M/%S")
1513 .unwrap()
1514 );
1515 assert_eq!(
1516 1642896000000000000,
1517 parse_timestamp_formatted("2022-01-23", "%Y-%m-%d").unwrap()
1518 );
1519 }
1520
1521 fn parse_timestamp_formatted(s: &str, format: &str) -> Result<i64, DataFusionError> {
1522 let result = string_to_timestamp_nanos_formatted_with_timezone(
1523 &Some("UTC".parse()?),
1524 s,
1525 format,
1526 );
1527 if let Err(e) = &result {
1528 eprintln!("Error parsing timestamp '{s}' using format '{format}': {e:?}");
1529 }
1530 result
1531 }
1532
1533 #[test]
1534 fn string_to_timestamp_formatted_invalid() {
1535 let cases = [
1537 ("", "%Y%m%d %H%M%S", "premature end of input"),
1538 ("SS", "%c", "premature end of input"),
1539 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1540 (
1541 "Wed, 18 Feb 2015 23:16:09 GMT",
1542 "%XX",
1543 "input contains invalid characters",
1544 ),
1545 (
1546 "Wed, 18 Feb 2015 23:16:09 GMT",
1547 "%Y%m%d %H%M%S",
1548 "input contains invalid characters",
1549 ),
1550 ];
1551
1552 for (s, f, ctx) in cases {
1553 let expected = format!(
1554 "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1555 );
1556 let actual = string_to_datetime_formatted(&Utc, s, f)
1557 .unwrap_err()
1558 .strip_backtrace();
1559 assert_eq!(actual, expected)
1560 }
1561 }
1562
1563 #[test]
1564 fn string_to_timestamp_invalid_arguments() {
1565 let cases = [
1567 ("", "%Y%m%d %H%M%S", "premature end of input"),
1568 ("SS", "%c", "premature end of input"),
1569 ("Wed, 18 Feb 2015 23:16:09 GMT", "", "trailing input"),
1570 (
1571 "Wed, 18 Feb 2015 23:16:09 GMT",
1572 "%XX",
1573 "input contains invalid characters",
1574 ),
1575 (
1576 "Wed, 18 Feb 2015 23:16:09 GMT",
1577 "%Y%m%d %H%M%S",
1578 "input contains invalid characters",
1579 ),
1580 ];
1581
1582 for (s, f, ctx) in cases {
1583 let expected = format!(
1584 "Execution error: Error parsing timestamp from '{s}' using format '{f}': {ctx}"
1585 );
1586 let actual = string_to_datetime_formatted(&Utc, s, f)
1587 .unwrap_err()
1588 .strip_backtrace();
1589 assert_eq!(actual, expected)
1590 }
1591 }
1592
1593 #[test]
1594 fn test_no_tz() {
1595 let udfs: Vec<Box<dyn ScalarUDFImpl>> = vec![
1596 Box::new(ToTimestampFunc::new_with_config(&ConfigOptions::default())),
1597 Box::new(ToTimestampSecondsFunc::new_with_config(
1598 &ConfigOptions::default(),
1599 )),
1600 Box::new(ToTimestampMillisFunc::new_with_config(
1601 &ConfigOptions::default(),
1602 )),
1603 Box::new(ToTimestampNanosFunc::new_with_config(
1604 &ConfigOptions::default(),
1605 )),
1606 Box::new(ToTimestampSecondsFunc::new_with_config(
1607 &ConfigOptions::default(),
1608 )),
1609 ];
1610
1611 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1612 let mut millis_builder = TimestampMillisecondArray::builder(2);
1613 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1614 let mut sec_builder = TimestampSecondArray::builder(2);
1615
1616 nanos_builder.append_value(1599572549190850000);
1617 millis_builder.append_value(1599572549190);
1618 micros_builder.append_value(1599572549190850);
1619 sec_builder.append_value(1599572549);
1620
1621 let nanos_timestamps =
1622 Arc::new(nanos_builder.finish().with_timezone("UTC")) as ArrayRef;
1623 let millis_timestamps =
1624 Arc::new(millis_builder.finish().with_timezone("UTC")) as ArrayRef;
1625 let micros_timestamps =
1626 Arc::new(micros_builder.finish().with_timezone("UTC")) as ArrayRef;
1627 let sec_timestamps =
1628 Arc::new(sec_builder.finish().with_timezone("UTC")) as ArrayRef;
1629
1630 let arrays = &[
1631 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1632 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1633 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1634 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1635 ];
1636
1637 for udf in &udfs {
1638 for array in arrays {
1639 let rt = udf.return_type(&[array.data_type()]).unwrap();
1640 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1641 assert!(matches!(rt, Timestamp(_, None)));
1642 let args = ScalarFunctionArgs {
1643 args: vec![array.clone()],
1644 arg_fields: vec![arg_field],
1645 number_rows: 4,
1646 return_field: Field::new("f", rt, true).into(),
1647 config_options: Arc::new(ConfigOptions::default()),
1648 };
1649 let res = udf
1650 .invoke_with_args(args)
1651 .expect("that to_timestamp parsed values without error");
1652 let array = match res {
1653 ColumnarValue::Array(res) => res,
1654 _ => panic!("Expected a columnar array"),
1655 };
1656 let ty = array.data_type();
1657 assert!(matches!(ty, Timestamp(_, None)));
1658 }
1659 }
1660
1661 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1662 let mut millis_builder = TimestampMillisecondArray::builder(2);
1663 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1664 let mut sec_builder = TimestampSecondArray::builder(2);
1665 let mut i64_builder = Int64Array::builder(2);
1666
1667 nanos_builder.append_value(1599572549190850000);
1668 millis_builder.append_value(1599572549190);
1669 micros_builder.append_value(1599572549190850);
1670 sec_builder.append_value(1599572549);
1671 i64_builder.append_value(1599572549);
1672
1673 let nanos_timestamps = Arc::new(nanos_builder.finish()) as ArrayRef;
1674 let millis_timestamps = Arc::new(millis_builder.finish()) as ArrayRef;
1675 let micros_timestamps = Arc::new(micros_builder.finish()) as ArrayRef;
1676 let sec_timestamps = Arc::new(sec_builder.finish()) as ArrayRef;
1677 let i64_timestamps = Arc::new(i64_builder.finish()) as ArrayRef;
1678
1679 let arrays = &[
1680 ColumnarValue::Array(Arc::clone(&nanos_timestamps)),
1681 ColumnarValue::Array(Arc::clone(&millis_timestamps)),
1682 ColumnarValue::Array(Arc::clone(µs_timestamps)),
1683 ColumnarValue::Array(Arc::clone(&sec_timestamps)),
1684 ColumnarValue::Array(Arc::clone(&i64_timestamps)),
1685 ];
1686
1687 for udf in &udfs {
1688 for array in arrays {
1689 let rt = udf.return_type(&[array.data_type()]).unwrap();
1690 assert!(matches!(rt, Timestamp(_, None)));
1691 let arg_field = Field::new("arg", array.data_type().clone(), true).into();
1692 let args = ScalarFunctionArgs {
1693 args: vec![array.clone()],
1694 arg_fields: vec![arg_field],
1695 number_rows: 5,
1696 return_field: Field::new("f", rt, true).into(),
1697 config_options: Arc::new(ConfigOptions::default()),
1698 };
1699 let res = udf
1700 .invoke_with_args(args)
1701 .expect("that to_timestamp parsed values without error");
1702 let array = match res {
1703 ColumnarValue::Array(res) => res,
1704 _ => panic!("Expected a columnar array"),
1705 };
1706 let ty = array.data_type();
1707 assert!(matches!(ty, Timestamp(_, None)));
1708 }
1709 }
1710 }
1711
1712 #[test]
1713 fn test_to_timestamp_arg_validation() {
1714 let mut date_string_builder = StringBuilder::with_capacity(2, 1024);
1715 date_string_builder.append_value("2020-09-08T13:42:29.19085Z");
1716
1717 let data = date_string_builder.finish();
1718
1719 let funcs: Vec<(ScalarFunctionImplementation, TimeUnit)> = vec![
1720 (Arc::new(to_timestamp), Nanosecond),
1721 (Arc::new(to_timestamp_micros), Microsecond),
1722 (Arc::new(to_timestamp_millis), Millisecond),
1723 (Arc::new(to_timestamp_nanos), Nanosecond),
1724 (Arc::new(to_timestamp_seconds), Second),
1725 ];
1726
1727 let mut nanos_builder = TimestampNanosecondArray::builder(2);
1728 let mut millis_builder = TimestampMillisecondArray::builder(2);
1729 let mut micros_builder = TimestampMicrosecondArray::builder(2);
1730 let mut sec_builder = TimestampSecondArray::builder(2);
1731
1732 nanos_builder.append_value(1599572549190850000);
1733 millis_builder.append_value(1599572549190);
1734 micros_builder.append_value(1599572549190850);
1735 sec_builder.append_value(1599572549);
1736
1737 let nanos_expected_timestamps = &nanos_builder.finish() as &dyn Array;
1738 let millis_expected_timestamps = &millis_builder.finish() as &dyn Array;
1739 let micros_expected_timestamps = µs_builder.finish() as &dyn Array;
1740 let sec_expected_timestamps = &sec_builder.finish() as &dyn Array;
1741
1742 for (func, time_unit) in funcs {
1743 let string_array = [
1745 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1746 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%s".to_string()))),
1747 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%c".to_string()))),
1748 ColumnarValue::Scalar(ScalarValue::Utf8(Some("%+".to_string()))),
1749 ];
1750 let parsed_timestamps = func(&string_array)
1751 .expect("that to_timestamp with format args parsed values without error");
1752 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1753 assert_eq!(parsed_array.len(), 1);
1754 match time_unit {
1755 Nanosecond => {
1756 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1757 }
1758 Millisecond => {
1759 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1760 }
1761 Microsecond => {
1762 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1763 }
1764 Second => {
1765 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1766 }
1767 };
1768 } else {
1769 panic!("Expected a columnar array")
1770 }
1771
1772 let string_array = [
1774 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1775 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%s".to_string()))),
1776 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%c".to_string()))),
1777 ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some("%+".to_string()))),
1778 ];
1779 let parsed_timestamps = func(&string_array)
1780 .expect("that to_timestamp with format args parsed values without error");
1781 if let ColumnarValue::Array(parsed_array) = parsed_timestamps {
1782 assert_eq!(parsed_array.len(), 1);
1783 assert!(matches!(parsed_array.data_type(), Timestamp(_, None)));
1784
1785 match time_unit {
1786 Nanosecond => {
1787 assert_eq!(nanos_expected_timestamps, parsed_array.as_ref())
1788 }
1789 Millisecond => {
1790 assert_eq!(millis_expected_timestamps, parsed_array.as_ref())
1791 }
1792 Microsecond => {
1793 assert_eq!(micros_expected_timestamps, parsed_array.as_ref())
1794 }
1795 Second => {
1796 assert_eq!(sec_expected_timestamps, parsed_array.as_ref())
1797 }
1798 };
1799 } else {
1800 panic!("Expected a columnar array")
1801 }
1802
1803 let string_array = [
1805 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1806 ColumnarValue::Scalar(ScalarValue::Int32(Some(1))),
1807 ColumnarValue::Scalar(ScalarValue::Int32(Some(2))),
1808 ColumnarValue::Scalar(ScalarValue::Int32(Some(3))),
1809 ];
1810
1811 let expected = "Unsupported data type Int32 for function".to_string();
1812 let actual = func(&string_array).unwrap_err().to_string();
1813 assert_contains!(actual, expected);
1814
1815 let string_array = [
1817 ColumnarValue::Array(Arc::new(data.clone()) as ArrayRef),
1818 ColumnarValue::Array(Arc::new(PrimitiveArray::<Int64Type>::new(
1819 vec![1i64].into(),
1820 None,
1821 )) as ArrayRef),
1822 ];
1823
1824 let expected = "Unsupported data type".to_string();
1825 let actual = func(&string_array).unwrap_err().to_string();
1826 assert_contains!(actual, expected);
1827 }
1828 }
1829
1830 #[test]
1831 fn test_decimal_to_nanoseconds_negative_scale() {
1832 let nanos = decimal_to_nanoseconds(5, -2);
1834 assert_eq!(nanos, 500_000_000_000); let nanos = decimal_to_nanoseconds(10, -1);
1838 assert_eq!(nanos, 100_000_000_000);
1839
1840 let nanos = decimal_to_nanoseconds(5, 0);
1842 assert_eq!(nanos, 5_000_000_000);
1843
1844 let nanos = decimal_to_nanoseconds(1500, 3);
1846 assert_eq!(nanos, 1_500_000_000);
1847 }
1848}