1use std::any::Any;
19use std::ops::{Add, Sub};
20use std::str::FromStr;
21use std::sync::Arc;
22
23use arrow::array::temporal_conversions::{
24 as_datetime_with_timezone, timestamp_ns_to_datetime,
25};
26use arrow::array::timezone::Tz;
27use arrow::array::types::{
28 ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
29 TimestampNanosecondType, TimestampSecondType,
30};
31use arrow::array::{Array, ArrayRef, Int64Array, PrimitiveArray};
32use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
33use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
34use datafusion_common::cast::as_primitive_array;
35use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue};
36use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
37use datafusion_expr::TypeSignature::Exact;
38use datafusion_expr::{
39 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
40};
41use datafusion_macros::user_doc;
42
43use chrono::{
44 DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
45};
46
47#[user_doc(
48 doc_section(label = "Time and Date Functions"),
49 description = "Truncates a timestamp value to a specified precision.",
50 syntax_example = "date_trunc(precision, expression)",
51 argument(
52 name = "precision",
53 description = r#"Time precision to truncate to. The following precisions are supported:
54
55 - year / YEAR
56 - quarter / QUARTER
57 - month / MONTH
58 - week / WEEK
59 - day / DAY
60 - hour / HOUR
61 - minute / MINUTE
62 - second / SECOND
63 - millisecond / MILLISECOND
64 - microsecond / MICROSECOND
65"#
66 ),
67 argument(
68 name = "expression",
69 description = "Time expression to operate on. Can be a constant, column, or function."
70 )
71)]
72#[derive(Debug, PartialEq, Eq, Hash)]
73pub struct DateTruncFunc {
74 signature: Signature,
75 aliases: Vec<String>,
76}
77
78impl Default for DateTruncFunc {
79 fn default() -> Self {
80 Self::new()
81 }
82}
83
84impl DateTruncFunc {
85 pub fn new() -> Self {
86 Self {
87 signature: Signature::one_of(
88 vec![
89 Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
90 Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
91 Exact(vec![
92 Utf8,
93 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
94 ]),
95 Exact(vec![
96 Utf8View,
97 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
98 ]),
99 Exact(vec![Utf8, Timestamp(Microsecond, None)]),
100 Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
101 Exact(vec![
102 Utf8,
103 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
104 ]),
105 Exact(vec![
106 Utf8View,
107 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
108 ]),
109 Exact(vec![Utf8, Timestamp(Millisecond, None)]),
110 Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
111 Exact(vec![
112 Utf8,
113 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
114 ]),
115 Exact(vec![
116 Utf8View,
117 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
118 ]),
119 Exact(vec![Utf8, Timestamp(Second, None)]),
120 Exact(vec![Utf8View, Timestamp(Second, None)]),
121 Exact(vec![
122 Utf8,
123 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
124 ]),
125 Exact(vec![
126 Utf8View,
127 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
128 ]),
129 ],
130 Volatility::Immutable,
131 ),
132 aliases: vec![String::from("datetrunc")],
133 }
134 }
135}
136
137impl ScalarUDFImpl for DateTruncFunc {
138 fn as_any(&self) -> &dyn Any {
139 self
140 }
141
142 fn name(&self) -> &str {
143 "date_trunc"
144 }
145
146 fn signature(&self) -> &Signature {
147 &self.signature
148 }
149
150 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
151 match &arg_types[1] {
152 Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
153 Ok(Timestamp(Nanosecond, None))
154 }
155 Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
156 Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
157 Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
158 Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
159 _ => plan_err!(
160 "The date_trunc function can only accept timestamp as the second arg."
161 ),
162 }
163 }
164
165 fn invoke_with_args(
166 &self,
167 args: datafusion_expr::ScalarFunctionArgs,
168 ) -> Result<ColumnarValue> {
169 let args = args.args;
170 let (granularity, array) = (&args[0], &args[1]);
171
172 let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
173 granularity
174 {
175 v.to_lowercase()
176 } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
177 {
178 v.to_lowercase()
179 } else {
180 return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
181 };
182
183 fn process_array<T: ArrowTimestampType>(
184 array: &dyn Array,
185 granularity: String,
186 tz_opt: &Option<Arc<str>>,
187 ) -> Result<ColumnarValue> {
188 let parsed_tz = parse_tz(tz_opt)?;
189 let array = as_primitive_array::<T>(array)?;
190
191 if matches!(
193 granularity.as_str(),
194 "second" | "minute" | "millisecond" | "microsecond"
198 ) ||
199 (parsed_tz.is_none() && matches!(granularity.as_str(), "hour" | "day"))
201 {
202 let result = general_date_trunc_array_fine_granularity(
203 T::UNIT,
204 array,
205 granularity.as_str(),
206 )?;
207 return Ok(ColumnarValue::Array(result));
208 }
209
210 let array: PrimitiveArray<T> = array
211 .try_unary(|x| {
212 general_date_trunc(T::UNIT, x, parsed_tz, granularity.as_str())
213 })?
214 .with_timezone_opt(tz_opt.clone());
215 Ok(ColumnarValue::Array(Arc::new(array)))
216 }
217
218 fn process_scalar<T: ArrowTimestampType>(
219 v: &Option<i64>,
220 granularity: String,
221 tz_opt: &Option<Arc<str>>,
222 ) -> Result<ColumnarValue> {
223 let parsed_tz = parse_tz(tz_opt)?;
224 let value = if let Some(v) = v {
225 Some(general_date_trunc(
226 T::UNIT,
227 *v,
228 parsed_tz,
229 granularity.as_str(),
230 )?)
231 } else {
232 None
233 };
234 let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
235 Ok(ColumnarValue::Scalar(value))
236 }
237
238 Ok(match array {
239 ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
240 process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
241 }
242 ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
243 process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
244 }
245 ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
246 process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
247 }
248 ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
249 process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
250 }
251 ColumnarValue::Array(array) => {
252 let array_type = array.data_type();
253 if let Timestamp(unit, tz_opt) = array_type {
254 match unit {
255 Second => process_array::<TimestampSecondType>(
256 array,
257 granularity,
258 tz_opt,
259 )?,
260 Millisecond => process_array::<TimestampMillisecondType>(
261 array,
262 granularity,
263 tz_opt,
264 )?,
265 Microsecond => process_array::<TimestampMicrosecondType>(
266 array,
267 granularity,
268 tz_opt,
269 )?,
270 Nanosecond => process_array::<TimestampNanosecondType>(
271 array,
272 granularity,
273 tz_opt,
274 )?,
275 }
276 } else {
277 return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
278 }
279 }
280 _ => {
281 return exec_err!(
282 "second argument of `date_trunc` must be timestamp scalar or array"
283 );
284 }
285 })
286 }
287
288 fn aliases(&self) -> &[String] {
289 &self.aliases
290 }
291
292 fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
293 let precision = &input[0];
295 let date_value = &input[1];
296
297 if precision.sort_properties.eq(&SortProperties::Singleton) {
298 Ok(date_value.sort_properties)
299 } else {
300 Ok(SortProperties::Unordered)
301 }
302 }
303 fn documentation(&self) -> Option<&Documentation> {
304 self.doc()
305 }
306}
307
308fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
309where
310 T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
311{
312 let value = match granularity {
313 "millisecond" => value,
314 "microsecond" => value,
315 "second" => value.and_then(|d| d.with_nanosecond(0)),
316 "minute" => value
317 .and_then(|d| d.with_nanosecond(0))
318 .and_then(|d| d.with_second(0)),
319 "hour" => value
320 .and_then(|d| d.with_nanosecond(0))
321 .and_then(|d| d.with_second(0))
322 .and_then(|d| d.with_minute(0)),
323 "day" => value
324 .and_then(|d| d.with_nanosecond(0))
325 .and_then(|d| d.with_second(0))
326 .and_then(|d| d.with_minute(0))
327 .and_then(|d| d.with_hour(0)),
328 "week" => value
329 .and_then(|d| d.with_nanosecond(0))
330 .and_then(|d| d.with_second(0))
331 .and_then(|d| d.with_minute(0))
332 .and_then(|d| d.with_hour(0))
333 .map(|d| {
334 d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
335 }),
336 "month" => value
337 .and_then(|d| d.with_nanosecond(0))
338 .and_then(|d| d.with_second(0))
339 .and_then(|d| d.with_minute(0))
340 .and_then(|d| d.with_hour(0))
341 .and_then(|d| d.with_day0(0)),
342 "quarter" => value
343 .and_then(|d| d.with_nanosecond(0))
344 .and_then(|d| d.with_second(0))
345 .and_then(|d| d.with_minute(0))
346 .and_then(|d| d.with_hour(0))
347 .and_then(|d| d.with_day0(0))
348 .and_then(|d| d.with_month(quarter_month(&d))),
349 "year" => value
350 .and_then(|d| d.with_nanosecond(0))
351 .and_then(|d| d.with_second(0))
352 .and_then(|d| d.with_minute(0))
353 .and_then(|d| d.with_hour(0))
354 .and_then(|d| d.with_day0(0))
355 .and_then(|d| d.with_month0(0)),
356 unsupported => {
357 return exec_err!("Unsupported date_trunc granularity: {unsupported}");
358 }
359 };
360 Ok(value)
361}
362
363fn quarter_month<T>(date: &T) -> u32
364where
365 T: Datelike,
366{
367 1 + 3 * ((date.month() - 1) / 3)
368}
369
370fn _date_trunc_coarse_with_tz(
371 granularity: &str,
372 value: Option<DateTime<Tz>>,
373) -> Result<Option<i64>> {
374 if let Some(value) = value {
375 let local = value.naive_local();
376 let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
377 let truncated = truncated.and_then(|truncated| {
378 match truncated.and_local_timezone(value.timezone()) {
379 LocalResult::None => {
380 truncated
386 .sub(TimeDelta::try_hours(3).unwrap())
387 .and_local_timezone(value.timezone())
388 .single()
389 .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
390 }
391 LocalResult::Single(datetime) => Some(datetime),
392 LocalResult::Ambiguous(datetime1, datetime2) => {
393 if datetime1.offset().fix() == value.offset().fix() {
398 Some(datetime1)
399 } else {
400 Some(datetime2)
401 }
402 }
403 }
404 });
405 Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
406 } else {
407 _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
408 Ok(None)
409 }
410}
411
412fn _date_trunc_coarse_without_tz(
413 granularity: &str,
414 value: Option<NaiveDateTime>,
415) -> Result<Option<i64>> {
416 let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
417 Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
418}
419
420fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i64> {
425 let value = match tz {
426 Some(tz) => {
427 let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
430 .ok_or(DataFusionError::Execution(format!(
431 "Timestamp {value} out of range"
432 )))?;
433 _date_trunc_coarse_with_tz(granularity, Some(value))
434 }
435 None => {
436 let value = timestamp_ns_to_datetime(value).ok_or_else(|| {
438 DataFusionError::Execution(format!("Timestamp {value} out of range"))
439 })?;
440 _date_trunc_coarse_without_tz(granularity, Some(value))
441 }
442 }?;
443
444 Ok(value.unwrap())
446}
447
448fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
455 tu: TimeUnit,
456 array: &PrimitiveArray<T>,
457 granularity: &str,
458) -> Result<ArrayRef> {
459 let unit = match (tu, granularity) {
460 (Second, "minute") => Some(Int64Array::new_scalar(60)),
461 (Second, "hour") => Some(Int64Array::new_scalar(3600)),
462 (Second, "day") => Some(Int64Array::new_scalar(86400)),
463
464 (Millisecond, "second") => Some(Int64Array::new_scalar(1_000)),
465 (Millisecond, "minute") => Some(Int64Array::new_scalar(60_000)),
466 (Millisecond, "hour") => Some(Int64Array::new_scalar(3_600_000)),
467 (Millisecond, "day") => Some(Int64Array::new_scalar(86_400_000)),
468
469 (Microsecond, "millisecond") => Some(Int64Array::new_scalar(1_000)),
470 (Microsecond, "second") => Some(Int64Array::new_scalar(1_000_000)),
471 (Microsecond, "minute") => Some(Int64Array::new_scalar(60_000_000)),
472 (Microsecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000)),
473 (Microsecond, "day") => Some(Int64Array::new_scalar(86_400_000_000)),
474
475 (Nanosecond, "microsecond") => Some(Int64Array::new_scalar(1_000)),
476 (Nanosecond, "millisecond") => Some(Int64Array::new_scalar(1_000_000)),
477 (Nanosecond, "second") => Some(Int64Array::new_scalar(1_000_000_000)),
478 (Nanosecond, "minute") => Some(Int64Array::new_scalar(60_000_000_000)),
479 (Nanosecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000_000)),
480 (Nanosecond, "day") => Some(Int64Array::new_scalar(86_400_000_000_000)),
481 _ => None,
482 };
483
484 if let Some(unit) = unit {
485 let original_type = array.data_type();
486 let array = arrow::compute::cast(array, &DataType::Int64)?;
487 let array = arrow::compute::kernels::numeric::div(&array, &unit)?;
488 let array = arrow::compute::kernels::numeric::mul(&array, &unit)?;
489 let array = arrow::compute::cast(&array, original_type)?;
490 Ok(array)
491 } else {
492 Ok(Arc::new(array.clone()))
494 }
495}
496
497fn general_date_trunc(
499 tu: TimeUnit,
500 value: i64,
501 tz: Option<Tz>,
502 granularity: &str,
503) -> Result<i64, DataFusionError> {
504 let scale = match tu {
505 Second => 1_000_000_000,
506 Millisecond => 1_000_000,
507 Microsecond => 1_000,
508 Nanosecond => 1,
509 };
510
511 let nano = date_trunc_coarse(granularity, scale * value, tz)?;
513
514 let result = match tu {
515 Second => match granularity {
516 "minute" => nano / 1_000_000_000 / 60 * 60,
517 _ => nano / 1_000_000_000,
518 },
519 Millisecond => match granularity {
520 "minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
521 "second" => nano / 1_000_000 / 1_000 * 1_000,
522 _ => nano / 1_000_000,
523 },
524 Microsecond => match granularity {
525 "minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000,
526 "second" => nano / 1_000 / 1_000_000 * 1_000_000,
527 "millisecond" => nano / 1_000 / 1_000 * 1_000,
528 _ => nano / 1_000,
529 },
530 _ => match granularity {
531 "minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60,
532 "second" => nano / 1_000_000_000 * 1_000_000_000,
533 "millisecond" => nano / 1_000_000 * 1_000_000,
534 "microsecond" => nano / 1_000 * 1_000,
535 _ => nano,
536 },
537 };
538 Ok(result)
539}
540
541fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
542 tz.as_ref()
543 .map(|tz| {
544 Tz::from_str(tz).map_err(|op| {
545 DataFusionError::Execution(format!("failed on timezone {tz}: {op:?}"))
546 })
547 })
548 .transpose()
549}
550
551#[cfg(test)]
552mod tests {
553 use std::sync::Arc;
554
555 use crate::datetime::date_trunc::{date_trunc_coarse, DateTruncFunc};
556
557 use arrow::array::cast::as_primitive_array;
558 use arrow::array::types::TimestampNanosecondType;
559 use arrow::array::{Array, TimestampNanosecondArray};
560 use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
561 use arrow::datatypes::{DataType, Field, TimeUnit};
562 use datafusion_common::config::ConfigOptions;
563 use datafusion_common::ScalarValue;
564 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
565
566 #[test]
567 fn date_trunc_test() {
568 let cases = vec![
569 (
570 "2020-09-08T13:42:29.190855Z",
571 "second",
572 "2020-09-08T13:42:29.000000Z",
573 ),
574 (
575 "2020-09-08T13:42:29.190855Z",
576 "minute",
577 "2020-09-08T13:42:00.000000Z",
578 ),
579 (
580 "2020-09-08T13:42:29.190855Z",
581 "hour",
582 "2020-09-08T13:00:00.000000Z",
583 ),
584 (
585 "2020-09-08T13:42:29.190855Z",
586 "day",
587 "2020-09-08T00:00:00.000000Z",
588 ),
589 (
590 "2020-09-08T13:42:29.190855Z",
591 "week",
592 "2020-09-07T00:00:00.000000Z",
593 ),
594 (
595 "2020-09-08T13:42:29.190855Z",
596 "month",
597 "2020-09-01T00:00:00.000000Z",
598 ),
599 (
600 "2020-09-08T13:42:29.190855Z",
601 "year",
602 "2020-01-01T00:00:00.000000Z",
603 ),
604 (
606 "2021-01-01T13:42:29.190855Z",
607 "week",
608 "2020-12-28T00:00:00.000000Z",
609 ),
610 (
611 "2020-01-01T13:42:29.190855Z",
612 "week",
613 "2019-12-30T00:00:00.000000Z",
614 ),
615 (
617 "2020-01-01T13:42:29.190855Z",
618 "quarter",
619 "2020-01-01T00:00:00.000000Z",
620 ),
621 (
622 "2020-02-01T13:42:29.190855Z",
623 "quarter",
624 "2020-01-01T00:00:00.000000Z",
625 ),
626 (
627 "2020-03-01T13:42:29.190855Z",
628 "quarter",
629 "2020-01-01T00:00:00.000000Z",
630 ),
631 (
632 "2020-04-01T13:42:29.190855Z",
633 "quarter",
634 "2020-04-01T00:00:00.000000Z",
635 ),
636 (
637 "2020-08-01T13:42:29.190855Z",
638 "quarter",
639 "2020-07-01T00:00:00.000000Z",
640 ),
641 (
642 "2020-11-01T13:42:29.190855Z",
643 "quarter",
644 "2020-10-01T00:00:00.000000Z",
645 ),
646 (
647 "2020-12-01T13:42:29.190855Z",
648 "quarter",
649 "2020-10-01T00:00:00.000000Z",
650 ),
651 ];
652
653 cases.iter().for_each(|(original, granularity, expected)| {
654 let left = string_to_timestamp_nanos(original).unwrap();
655 let right = string_to_timestamp_nanos(expected).unwrap();
656 let result = date_trunc_coarse(granularity, left, None).unwrap();
657 assert_eq!(result, right, "{original} = {expected}");
658 });
659 }
660
661 #[test]
662 fn test_date_trunc_timezones() {
663 let cases = vec![
664 (
665 vec![
666 "2020-09-08T00:00:00Z",
667 "2020-09-08T01:00:00Z",
668 "2020-09-08T02:00:00Z",
669 "2020-09-08T03:00:00Z",
670 "2020-09-08T04:00:00Z",
671 ],
672 Some("+00".into()),
673 vec![
674 "2020-09-08T00:00:00Z",
675 "2020-09-08T00:00:00Z",
676 "2020-09-08T00:00:00Z",
677 "2020-09-08T00:00:00Z",
678 "2020-09-08T00:00:00Z",
679 ],
680 ),
681 (
682 vec![
683 "2020-09-08T00:00:00Z",
684 "2020-09-08T01:00:00Z",
685 "2020-09-08T02:00:00Z",
686 "2020-09-08T03:00:00Z",
687 "2020-09-08T04:00:00Z",
688 ],
689 None,
690 vec![
691 "2020-09-08T00:00:00Z",
692 "2020-09-08T00:00:00Z",
693 "2020-09-08T00:00:00Z",
694 "2020-09-08T00:00:00Z",
695 "2020-09-08T00:00:00Z",
696 ],
697 ),
698 (
699 vec![
700 "2020-09-08T00:00:00Z",
701 "2020-09-08T01:00:00Z",
702 "2020-09-08T02:00:00Z",
703 "2020-09-08T03:00:00Z",
704 "2020-09-08T04:00:00Z",
705 ],
706 Some("-02".into()),
707 vec![
708 "2020-09-07T02:00:00Z",
709 "2020-09-07T02:00:00Z",
710 "2020-09-08T02:00:00Z",
711 "2020-09-08T02:00:00Z",
712 "2020-09-08T02:00:00Z",
713 ],
714 ),
715 (
716 vec![
717 "2020-09-08T00:00:00+05",
718 "2020-09-08T01:00:00+05",
719 "2020-09-08T02:00:00+05",
720 "2020-09-08T03:00:00+05",
721 "2020-09-08T04:00:00+05",
722 ],
723 Some("+05".into()),
724 vec![
725 "2020-09-08T00:00:00+05",
726 "2020-09-08T00:00:00+05",
727 "2020-09-08T00:00:00+05",
728 "2020-09-08T00:00:00+05",
729 "2020-09-08T00:00:00+05",
730 ],
731 ),
732 (
733 vec![
734 "2020-09-08T00:00:00+08",
735 "2020-09-08T01:00:00+08",
736 "2020-09-08T02:00:00+08",
737 "2020-09-08T03:00:00+08",
738 "2020-09-08T04:00:00+08",
739 ],
740 Some("+08".into()),
741 vec![
742 "2020-09-08T00:00:00+08",
743 "2020-09-08T00:00:00+08",
744 "2020-09-08T00:00:00+08",
745 "2020-09-08T00:00:00+08",
746 "2020-09-08T00:00:00+08",
747 ],
748 ),
749 (
750 vec![
751 "2024-10-26T23:00:00Z",
752 "2024-10-27T00:00:00Z",
753 "2024-10-27T01:00:00Z",
754 "2024-10-27T02:00:00Z",
755 ],
756 Some("Europe/Berlin".into()),
757 vec![
758 "2024-10-27T00:00:00+02",
759 "2024-10-27T00:00:00+02",
760 "2024-10-27T00:00:00+02",
761 "2024-10-27T00:00:00+02",
762 ],
763 ),
764 (
765 vec![
766 "2018-02-18T00:00:00Z",
767 "2018-02-18T01:00:00Z",
768 "2018-02-18T02:00:00Z",
769 "2018-02-18T03:00:00Z",
770 "2018-11-04T01:00:00Z",
771 "2018-11-04T02:00:00Z",
772 "2018-11-04T03:00:00Z",
773 "2018-11-04T04:00:00Z",
774 ],
775 Some("America/Sao_Paulo".into()),
776 vec![
777 "2018-02-17T00:00:00-02",
778 "2018-02-17T00:00:00-02",
779 "2018-02-17T00:00:00-02",
780 "2018-02-18T00:00:00-03",
781 "2018-11-03T00:00:00-03",
782 "2018-11-03T00:00:00-03",
783 "2018-11-04T01:00:00-02",
784 "2018-11-04T01:00:00-02",
785 ],
786 ),
787 ];
788
789 cases.iter().for_each(|(original, tz_opt, expected)| {
790 let input = original
791 .iter()
792 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
793 .collect::<TimestampNanosecondArray>()
794 .with_timezone_opt(tz_opt.clone());
795 let right = expected
796 .iter()
797 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
798 .collect::<TimestampNanosecondArray>()
799 .with_timezone_opt(tz_opt.clone());
800 let batch_len = input.len();
801 let arg_fields = vec![
802 Field::new("a", DataType::Utf8, false).into(),
803 Field::new("b", input.data_type().clone(), false).into(),
804 ];
805 let args = datafusion_expr::ScalarFunctionArgs {
806 args: vec![
807 ColumnarValue::Scalar(ScalarValue::from("day")),
808 ColumnarValue::Array(Arc::new(input)),
809 ],
810 arg_fields,
811 number_rows: batch_len,
812 return_field: Field::new(
813 "f",
814 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
815 true,
816 )
817 .into(),
818 config_options: Arc::new(ConfigOptions::default()),
819 };
820 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
821 if let ColumnarValue::Array(result) = result {
822 assert_eq!(
823 result.data_type(),
824 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
825 );
826 let left = as_primitive_array::<TimestampNanosecondType>(&result);
827 assert_eq!(left, &right);
828 } else {
829 panic!("unexpected column type");
830 }
831 });
832 }
833
834 #[test]
835 fn test_date_trunc_hour_timezones() {
836 let cases = vec![
837 (
838 vec![
839 "2020-09-08T00:30:00Z",
840 "2020-09-08T01:30:00Z",
841 "2020-09-08T02:30:00Z",
842 "2020-09-08T03:30:00Z",
843 "2020-09-08T04:30:00Z",
844 ],
845 Some("+00".into()),
846 vec![
847 "2020-09-08T00:00:00Z",
848 "2020-09-08T01:00:00Z",
849 "2020-09-08T02:00:00Z",
850 "2020-09-08T03:00:00Z",
851 "2020-09-08T04:00:00Z",
852 ],
853 ),
854 (
855 vec![
856 "2020-09-08T00:30:00Z",
857 "2020-09-08T01:30:00Z",
858 "2020-09-08T02:30:00Z",
859 "2020-09-08T03:30:00Z",
860 "2020-09-08T04:30:00Z",
861 ],
862 None,
863 vec![
864 "2020-09-08T00:00:00Z",
865 "2020-09-08T01:00:00Z",
866 "2020-09-08T02:00:00Z",
867 "2020-09-08T03:00:00Z",
868 "2020-09-08T04:00:00Z",
869 ],
870 ),
871 (
872 vec![
873 "2020-09-08T00:30:00Z",
874 "2020-09-08T01:30:00Z",
875 "2020-09-08T02:30:00Z",
876 "2020-09-08T03:30:00Z",
877 "2020-09-08T04:30:00Z",
878 ],
879 Some("-02".into()),
880 vec![
881 "2020-09-08T00:00:00Z",
882 "2020-09-08T01:00:00Z",
883 "2020-09-08T02:00:00Z",
884 "2020-09-08T03:00:00Z",
885 "2020-09-08T04:00:00Z",
886 ],
887 ),
888 (
889 vec![
890 "2020-09-08T00:30:00+05",
891 "2020-09-08T01:30:00+05",
892 "2020-09-08T02:30:00+05",
893 "2020-09-08T03:30:00+05",
894 "2020-09-08T04:30:00+05",
895 ],
896 Some("+05".into()),
897 vec![
898 "2020-09-08T00:00:00+05",
899 "2020-09-08T01:00:00+05",
900 "2020-09-08T02:00:00+05",
901 "2020-09-08T03:00:00+05",
902 "2020-09-08T04:00:00+05",
903 ],
904 ),
905 (
906 vec![
907 "2020-09-08T00:30:00+08",
908 "2020-09-08T01:30:00+08",
909 "2020-09-08T02:30:00+08",
910 "2020-09-08T03:30:00+08",
911 "2020-09-08T04:30:00+08",
912 ],
913 Some("+08".into()),
914 vec![
915 "2020-09-08T00:00:00+08",
916 "2020-09-08T01:00:00+08",
917 "2020-09-08T02:00:00+08",
918 "2020-09-08T03:00:00+08",
919 "2020-09-08T04:00:00+08",
920 ],
921 ),
922 (
923 vec![
924 "2024-10-26T23:30:00Z",
925 "2024-10-27T00:30:00Z",
926 "2024-10-27T01:30:00Z",
927 "2024-10-27T02:30:00Z",
928 ],
929 Some("Europe/Berlin".into()),
930 vec![
931 "2024-10-27T01:00:00+02",
932 "2024-10-27T02:00:00+02",
933 "2024-10-27T02:00:00+01",
934 "2024-10-27T03:00:00+01",
935 ],
936 ),
937 (
938 vec![
939 "2018-02-18T00:30:00Z",
940 "2018-02-18T01:30:00Z",
941 "2018-02-18T02:30:00Z",
942 "2018-02-18T03:30:00Z",
943 "2018-11-04T01:00:00Z",
944 "2018-11-04T02:00:00Z",
945 "2018-11-04T03:00:00Z",
946 "2018-11-04T04:00:00Z",
947 ],
948 Some("America/Sao_Paulo".into()),
949 vec![
950 "2018-02-17T22:00:00-02",
951 "2018-02-17T23:00:00-02",
952 "2018-02-17T23:00:00-03",
953 "2018-02-18T00:00:00-03",
954 "2018-11-03T22:00:00-03",
955 "2018-11-03T23:00:00-03",
956 "2018-11-04T01:00:00-02",
957 "2018-11-04T02:00:00-02",
958 ],
959 ),
960 (
961 vec![
962 "2024-10-26T23:30:00Z",
963 "2024-10-27T00:30:00Z",
964 "2024-10-27T01:30:00Z",
965 "2024-10-27T02:30:00Z",
966 ],
967 Some("Asia/Kathmandu".into()), vec![
969 "2024-10-27T05:00:00+05:45",
970 "2024-10-27T06:00:00+05:45",
971 "2024-10-27T07:00:00+05:45",
972 "2024-10-27T08:00:00+05:45",
973 ],
974 ),
975 ];
976
977 cases.iter().for_each(|(original, tz_opt, expected)| {
978 let input = original
979 .iter()
980 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
981 .collect::<TimestampNanosecondArray>()
982 .with_timezone_opt(tz_opt.clone());
983 let right = expected
984 .iter()
985 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
986 .collect::<TimestampNanosecondArray>()
987 .with_timezone_opt(tz_opt.clone());
988 let batch_len = input.len();
989 let arg_fields = vec![
990 Field::new("a", DataType::Utf8, false).into(),
991 Field::new("b", input.data_type().clone(), false).into(),
992 ];
993 let args = datafusion_expr::ScalarFunctionArgs {
994 args: vec![
995 ColumnarValue::Scalar(ScalarValue::from("hour")),
996 ColumnarValue::Array(Arc::new(input)),
997 ],
998 arg_fields,
999 number_rows: batch_len,
1000 return_field: Field::new(
1001 "f",
1002 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1003 true,
1004 )
1005 .into(),
1006 config_options: Arc::new(ConfigOptions::default()),
1007 };
1008 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1009 if let ColumnarValue::Array(result) = result {
1010 assert_eq!(
1011 result.data_type(),
1012 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1013 );
1014 let left = as_primitive_array::<TimestampNanosecondType>(&result);
1015 assert_eq!(left, &right);
1016 } else {
1017 panic!("unexpected column type");
1018 }
1019 });
1020 }
1021}