Skip to main content

datafusion_functions/datetime/
date_trunc.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::num::NonZeroI64;
20use std::ops::{Add, Sub};
21use std::str::FromStr;
22use std::sync::Arc;
23
24use arrow::array::temporal_conversions::{
25    MICROSECONDS, MILLISECONDS, NANOSECONDS, as_datetime_with_timezone,
26    timestamp_ns_to_datetime,
27};
28use arrow::array::timezone::Tz;
29use arrow::array::types::{
30    ArrowTimestampType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
31    Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType,
32    TimestampNanosecondType, TimestampSecondType,
33};
34use arrow::array::{Array, ArrayRef, PrimitiveArray};
35use arrow::datatypes::DataType::{self, Time32, Time64, Timestamp};
36use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
37use arrow::datatypes::{Field, FieldRef};
38use datafusion_common::cast::as_primitive_array;
39use datafusion_common::types::{NativeType, logical_date, logical_string};
40use datafusion_common::{
41    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, internal_err,
42};
43use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
44use datafusion_expr::{
45    ColumnarValue, Documentation, ReturnFieldArgs, ScalarUDFImpl, Signature,
46    TypeSignature, Volatility,
47};
48use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
49use datafusion_macros::user_doc;
50
51use chrono::{
52    DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
53};
54
55/// Represents the granularity for date truncation operations
56#[derive(Debug, Clone, Copy, PartialEq, Eq)]
57enum DateTruncGranularity {
58    Microsecond,
59    Millisecond,
60    Second,
61    Minute,
62    Hour,
63    Day,
64    Week,
65    Month,
66    Quarter,
67    Year,
68}
69
70impl DateTruncGranularity {
71    /// List of all supported granularity values
72    /// Cannot use HashMap here as it would require lazy_static or once_cell,
73    /// Rust does not support const HashMap yet.
74    const SUPPORTED_GRANULARITIES: &[&str] = &[
75        "microsecond",
76        "millisecond",
77        "second",
78        "minute",
79        "hour",
80        "day",
81        "week",
82        "month",
83        "quarter",
84        "year",
85    ];
86
87    /// Parse a granularity string into a DateTruncGranularity enum
88    fn from_str(s: &str) -> Result<Self> {
89        // Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
90        match s.to_lowercase().as_str() {
91            "microsecond" => Ok(Self::Microsecond),
92            "millisecond" => Ok(Self::Millisecond),
93            "second" => Ok(Self::Second),
94            "minute" => Ok(Self::Minute),
95            "hour" => Ok(Self::Hour),
96            "day" => Ok(Self::Day),
97            "week" => Ok(Self::Week),
98            "month" => Ok(Self::Month),
99            "quarter" => Ok(Self::Quarter),
100            "year" => Ok(Self::Year),
101            _ => {
102                let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
103                exec_err!(
104                    "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
105                )
106            }
107        }
108    }
109
110    /// Returns true if this granularity can be handled with simple arithmetic
111    /// (fine granularity: second, minute, millisecond, microsecond)
112    fn is_fine_granularity(&self) -> bool {
113        matches!(
114            self,
115            Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
116        )
117    }
118
119    /// Returns true if this granularity can be handled with simple arithmetic in UTC
120    /// (hour and day in addition to fine granularities)
121    fn is_fine_granularity_utc(&self) -> bool {
122        self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
123    }
124
125    /// Returns true if this granularity is valid for Time types
126    /// Time types don't have date components, so day/week/month/quarter/year are not valid
127    fn valid_for_time(&self) -> bool {
128        matches!(
129            self,
130            Self::Hour
131                | Self::Minute
132                | Self::Second
133                | Self::Millisecond
134                | Self::Microsecond
135        )
136    }
137}
138
139#[user_doc(
140    doc_section(label = "Time and Date Functions"),
141    description = "Truncates a timestamp or time value to a specified precision.",
142    syntax_example = "date_trunc(precision, expression)",
143    argument(
144        name = "precision",
145        description = r#"Time precision to truncate to. The following precisions are supported:
146
147    For Timestamp types:
148    - year / YEAR
149    - quarter / QUARTER
150    - month / MONTH
151    - week / WEEK
152    - day / DAY
153    - hour / HOUR
154    - minute / MINUTE
155    - second / SECOND
156    - millisecond / MILLISECOND
157    - microsecond / MICROSECOND
158
159    For Time types (hour, minute, second, millisecond, microsecond only):
160    - hour / HOUR
161    - minute / MINUTE
162    - second / SECOND
163    - millisecond / MILLISECOND
164    - microsecond / MICROSECOND
165"#
166    ),
167    argument(
168        name = "expression",
169        description = "Timestamp or time expression to operate on. Can be a constant, column, or function."
170    )
171)]
172#[derive(Debug, PartialEq, Eq, Hash)]
173pub struct DateTruncFunc {
174    signature: Signature,
175    aliases: Vec<String>,
176}
177
178impl Default for DateTruncFunc {
179    fn default() -> Self {
180        Self::new()
181    }
182}
183
184impl DateTruncFunc {
185    pub fn new() -> Self {
186        Self {
187            signature: Signature::one_of(
188                vec![
189                    TypeSignature::Coercible(vec![
190                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
191                        Coercion::new_implicit(
192                            TypeSignatureClass::Timestamp,
193                            // Allow implicit cast from string and date to timestamp for backward compatibility
194                            vec![
195                                TypeSignatureClass::Native(logical_string()),
196                                TypeSignatureClass::Native(logical_date()),
197                            ],
198                            NativeType::Timestamp(Nanosecond, None),
199                        ),
200                    ]),
201                    TypeSignature::Coercible(vec![
202                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
203                        Coercion::new_exact(TypeSignatureClass::Time),
204                    ]),
205                ],
206                Volatility::Immutable,
207            ),
208            aliases: vec![String::from("datetrunc")],
209        }
210    }
211}
212
213impl ScalarUDFImpl for DateTruncFunc {
214    fn as_any(&self) -> &dyn Any {
215        self
216    }
217
218    fn name(&self) -> &str {
219        "date_trunc"
220    }
221
222    fn signature(&self) -> &Signature {
223        &self.signature
224    }
225
226    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
227        internal_err!("return_field_from_args should be called instead")
228    }
229
230    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
231        let field = &args.arg_fields[1];
232        let return_type = if field.data_type().is_null() {
233            Timestamp(Nanosecond, None)
234        } else {
235            field.data_type().clone()
236        };
237        Ok(Arc::new(Field::new(
238            self.name(),
239            return_type,
240            field.is_nullable(),
241        )))
242    }
243
244    fn invoke_with_args(
245        &self,
246        args: datafusion_expr::ScalarFunctionArgs,
247    ) -> Result<ColumnarValue> {
248        let args = args.args;
249        let (granularity, array) = (&args[0], &args[1]);
250
251        let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
252            granularity
253        {
254            v.to_lowercase()
255        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
256        {
257            v.to_lowercase()
258        } else if let ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(v))) = granularity
259        {
260            v.to_lowercase()
261        } else {
262            return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
263        };
264
265        let granularity = DateTruncGranularity::from_str(&granularity_str)?;
266
267        // Check upfront if granularity is valid for Time types
268        let is_time_type = matches!(array.data_type(), Time64(_) | Time32(_));
269        if is_time_type && !granularity.valid_for_time() {
270            return exec_err!(
271                "date_trunc does not support '{}' granularity for Time types. Valid values are: hour, minute, second, millisecond, microsecond",
272                granularity_str
273            );
274        }
275
276        fn process_array<T: ArrowTimestampType>(
277            array: &dyn Array,
278            granularity: DateTruncGranularity,
279            tz_opt: &Option<Arc<str>>,
280        ) -> Result<ColumnarValue> {
281            let parsed_tz = parse_tz(tz_opt)?;
282            let array = as_primitive_array::<T>(array)?;
283
284            // fast path for fine granularity
285            // For modern timezones, it's correct to truncate "minute" in this way.
286            // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
287            // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
288            // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
289            if granularity.is_fine_granularity()
290                || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
291            {
292                let result = general_date_trunc_array_fine_granularity(
293                    T::UNIT,
294                    array,
295                    granularity,
296                    tz_opt.clone(),
297                )?;
298                return Ok(ColumnarValue::Array(result));
299            }
300
301            let array: PrimitiveArray<T> = array
302                .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
303                .with_timezone_opt(tz_opt.clone());
304            Ok(ColumnarValue::Array(Arc::new(array)))
305        }
306
307        fn process_scalar<T: ArrowTimestampType>(
308            v: &Option<i64>,
309            granularity: DateTruncGranularity,
310            tz_opt: &Option<Arc<str>>,
311        ) -> Result<ColumnarValue> {
312            let parsed_tz = parse_tz(tz_opt)?;
313            let value = if let Some(v) = v {
314                Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
315            } else {
316                None
317            };
318            let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
319            Ok(ColumnarValue::Scalar(value))
320        }
321
322        Ok(match array {
323            ColumnarValue::Scalar(ScalarValue::Null) => {
324                // NULL input returns NULL timestamp
325                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(None, None))
326            }
327            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
328                process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
329            }
330            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
331                process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
332            }
333            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
334                process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
335            }
336            ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
337                process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
338            }
339            ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(v)) => {
340                let truncated = v.map(|val| truncate_time_nanos(val, granularity));
341                ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(truncated))
342            }
343            ColumnarValue::Scalar(ScalarValue::Time64Microsecond(v)) => {
344                let truncated = v.map(|val| truncate_time_micros(val, granularity));
345                ColumnarValue::Scalar(ScalarValue::Time64Microsecond(truncated))
346            }
347            ColumnarValue::Scalar(ScalarValue::Time32Millisecond(v)) => {
348                let truncated = v.map(|val| truncate_time_millis(val, granularity));
349                ColumnarValue::Scalar(ScalarValue::Time32Millisecond(truncated))
350            }
351            ColumnarValue::Scalar(ScalarValue::Time32Second(v)) => {
352                let truncated = v.map(|val| truncate_time_secs(val, granularity));
353                ColumnarValue::Scalar(ScalarValue::Time32Second(truncated))
354            }
355            ColumnarValue::Array(array) => {
356                let array_type = array.data_type();
357                match array_type {
358                    Timestamp(Second, tz_opt) => {
359                        process_array::<TimestampSecondType>(array, granularity, tz_opt)?
360                    }
361                    Timestamp(Millisecond, tz_opt) => process_array::<
362                        TimestampMillisecondType,
363                    >(
364                        array, granularity, tz_opt
365                    )?,
366                    Timestamp(Microsecond, tz_opt) => process_array::<
367                        TimestampMicrosecondType,
368                    >(
369                        array, granularity, tz_opt
370                    )?,
371                    Timestamp(Nanosecond, tz_opt) => process_array::<
372                        TimestampNanosecondType,
373                    >(
374                        array, granularity, tz_opt
375                    )?,
376                    Time64(Nanosecond) => {
377                        let arr = as_primitive_array::<Time64NanosecondType>(array)?;
378                        let result: PrimitiveArray<Time64NanosecondType> =
379                            arr.unary(|v| truncate_time_nanos(v, granularity));
380                        ColumnarValue::Array(Arc::new(result))
381                    }
382                    Time64(Microsecond) => {
383                        let arr = as_primitive_array::<Time64MicrosecondType>(array)?;
384                        let result: PrimitiveArray<Time64MicrosecondType> =
385                            arr.unary(|v| truncate_time_micros(v, granularity));
386                        ColumnarValue::Array(Arc::new(result))
387                    }
388                    Time32(Millisecond) => {
389                        let arr = as_primitive_array::<Time32MillisecondType>(array)?;
390                        let result: PrimitiveArray<Time32MillisecondType> =
391                            arr.unary(|v| truncate_time_millis(v, granularity));
392                        ColumnarValue::Array(Arc::new(result))
393                    }
394                    Time32(Second) => {
395                        let arr = as_primitive_array::<Time32SecondType>(array)?;
396                        let result: PrimitiveArray<Time32SecondType> =
397                            arr.unary(|v| truncate_time_secs(v, granularity));
398                        ColumnarValue::Array(Arc::new(result))
399                    }
400                    _ => {
401                        return exec_err!(
402                            "second argument of `date_trunc` is an unsupported array type: {array_type}"
403                        );
404                    }
405                }
406            }
407            _ => {
408                return exec_err!(
409                    "second argument of `date_trunc` must be timestamp, time scalar or array"
410                );
411            }
412        })
413    }
414
415    fn aliases(&self) -> &[String] {
416        &self.aliases
417    }
418
419    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
420        // The DATE_TRUNC function preserves the order of its second argument.
421        let precision = &input[0];
422        let date_value = &input[1];
423
424        if precision.sort_properties.eq(&SortProperties::Singleton) {
425            Ok(date_value.sort_properties)
426        } else {
427            Ok(SortProperties::Unordered)
428        }
429    }
430    fn documentation(&self) -> Option<&Documentation> {
431        self.doc()
432    }
433}
434
435const NANOS_PER_MICROSECOND: i64 = NANOSECONDS / MICROSECONDS;
436const NANOS_PER_MILLISECOND: i64 = NANOSECONDS / MILLISECONDS;
437const NANOS_PER_SECOND: i64 = NANOSECONDS;
438const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
439const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
440
441const MICROS_PER_MILLISECOND: i64 = MICROSECONDS / MILLISECONDS;
442const MICROS_PER_SECOND: i64 = MICROSECONDS;
443const MICROS_PER_MINUTE: i64 = 60 * MICROS_PER_SECOND;
444const MICROS_PER_HOUR: i64 = 60 * MICROS_PER_MINUTE;
445
446const MILLIS_PER_SECOND: i32 = MILLISECONDS as i32;
447const MILLIS_PER_MINUTE: i32 = 60 * MILLIS_PER_SECOND;
448const MILLIS_PER_HOUR: i32 = 60 * MILLIS_PER_MINUTE;
449
450const SECS_PER_MINUTE: i32 = 60;
451const SECS_PER_HOUR: i32 = 60 * SECS_PER_MINUTE;
452
453/// Truncate time in nanoseconds to the specified granularity
454fn truncate_time_nanos(value: i64, granularity: DateTruncGranularity) -> i64 {
455    match granularity {
456        DateTruncGranularity::Hour => value - (value % NANOS_PER_HOUR),
457        DateTruncGranularity::Minute => value - (value % NANOS_PER_MINUTE),
458        DateTruncGranularity::Second => value - (value % NANOS_PER_SECOND),
459        DateTruncGranularity::Millisecond => value - (value % NANOS_PER_MILLISECOND),
460        DateTruncGranularity::Microsecond => value - (value % NANOS_PER_MICROSECOND),
461        // Other granularities are not valid for time - should be caught earlier
462        _ => value,
463    }
464}
465
466/// Truncate time in microseconds to the specified granularity
467fn truncate_time_micros(value: i64, granularity: DateTruncGranularity) -> i64 {
468    match granularity {
469        DateTruncGranularity::Hour => value - (value % MICROS_PER_HOUR),
470        DateTruncGranularity::Minute => value - (value % MICROS_PER_MINUTE),
471        DateTruncGranularity::Second => value - (value % MICROS_PER_SECOND),
472        DateTruncGranularity::Millisecond => value - (value % MICROS_PER_MILLISECOND),
473        DateTruncGranularity::Microsecond => value, // Already at microsecond precision
474        // Other granularities are not valid for time
475        _ => value,
476    }
477}
478
479/// Truncate time in milliseconds to the specified granularity
480fn truncate_time_millis(value: i32, granularity: DateTruncGranularity) -> i32 {
481    match granularity {
482        DateTruncGranularity::Hour => value - (value % MILLIS_PER_HOUR),
483        DateTruncGranularity::Minute => value - (value % MILLIS_PER_MINUTE),
484        DateTruncGranularity::Second => value - (value % MILLIS_PER_SECOND),
485        DateTruncGranularity::Millisecond => value, // Already at millisecond precision
486        DateTruncGranularity::Microsecond => value, // Can't truncate to finer precision
487        // Other granularities are not valid for time
488        _ => value,
489    }
490}
491
492/// Truncate time in seconds to the specified granularity
493fn truncate_time_secs(value: i32, granularity: DateTruncGranularity) -> i32 {
494    match granularity {
495        DateTruncGranularity::Hour => value - (value % SECS_PER_HOUR),
496        DateTruncGranularity::Minute => value - (value % SECS_PER_MINUTE),
497        DateTruncGranularity::Second => value, // Already at second precision
498        DateTruncGranularity::Millisecond => value, // Can't truncate to finer precision
499        DateTruncGranularity::Microsecond => value, // Can't truncate to finer precision
500        // Other granularities are not valid for time
501        _ => value,
502    }
503}
504
505fn _date_trunc_coarse<T>(
506    granularity: DateTruncGranularity,
507    value: Option<T>,
508) -> Result<Option<T>>
509where
510    T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
511{
512    let value = match granularity {
513        DateTruncGranularity::Millisecond => value,
514        DateTruncGranularity::Microsecond => value,
515        DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
516        DateTruncGranularity::Minute => value
517            .and_then(|d| d.with_nanosecond(0))
518            .and_then(|d| d.with_second(0)),
519        DateTruncGranularity::Hour => value
520            .and_then(|d| d.with_nanosecond(0))
521            .and_then(|d| d.with_second(0))
522            .and_then(|d| d.with_minute(0)),
523        DateTruncGranularity::Day => value
524            .and_then(|d| d.with_nanosecond(0))
525            .and_then(|d| d.with_second(0))
526            .and_then(|d| d.with_minute(0))
527            .and_then(|d| d.with_hour(0)),
528        DateTruncGranularity::Week => value
529            .and_then(|d| d.with_nanosecond(0))
530            .and_then(|d| d.with_second(0))
531            .and_then(|d| d.with_minute(0))
532            .and_then(|d| d.with_hour(0))
533            .map(|d| {
534                d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
535            }),
536        DateTruncGranularity::Month => value
537            .and_then(|d| d.with_nanosecond(0))
538            .and_then(|d| d.with_second(0))
539            .and_then(|d| d.with_minute(0))
540            .and_then(|d| d.with_hour(0))
541            .and_then(|d| d.with_day0(0)),
542        DateTruncGranularity::Quarter => value
543            .and_then(|d| d.with_nanosecond(0))
544            .and_then(|d| d.with_second(0))
545            .and_then(|d| d.with_minute(0))
546            .and_then(|d| d.with_hour(0))
547            .and_then(|d| d.with_day0(0))
548            .and_then(|d| d.with_month(quarter_month(&d))),
549        DateTruncGranularity::Year => value
550            .and_then(|d| d.with_nanosecond(0))
551            .and_then(|d| d.with_second(0))
552            .and_then(|d| d.with_minute(0))
553            .and_then(|d| d.with_hour(0))
554            .and_then(|d| d.with_day0(0))
555            .and_then(|d| d.with_month0(0)),
556    };
557    Ok(value)
558}
559
560fn quarter_month<T>(date: &T) -> u32
561where
562    T: Datelike,
563{
564    1 + 3 * ((date.month() - 1) / 3)
565}
566
567fn _date_trunc_coarse_with_tz(
568    granularity: DateTruncGranularity,
569    value: Option<DateTime<Tz>>,
570) -> Result<Option<i64>> {
571    if let Some(value) = value {
572        let local = value.naive_local();
573        let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
574        let truncated = truncated.and_then(|truncated| {
575            match truncated.and_local_timezone(value.timezone()) {
576                LocalResult::None => {
577                    // This can happen if the date_trunc operation moves the time into
578                    // an hour that doesn't exist due to daylight savings. On known example where
579                    // this can happen is with historic dates in the America/Sao_Paulo time zone.
580                    // To account for this adjust the time by a few hours, convert to local time,
581                    // and then adjust the time back.
582                    truncated
583                        .sub(TimeDelta::try_hours(3).unwrap())
584                        .and_local_timezone(value.timezone())
585                        .single()
586                        .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
587                }
588                LocalResult::Single(datetime) => Some(datetime),
589                LocalResult::Ambiguous(datetime1, datetime2) => {
590                    // Because we are truncating from an equally or more specific time
591                    // the original time must have been within the ambiguous local time
592                    // period. Therefore the offset of one of these times should match the
593                    // offset of the original time.
594                    if datetime1.offset().fix() == value.offset().fix() {
595                        Some(datetime1)
596                    } else {
597                        Some(datetime2)
598                    }
599                }
600            }
601        });
602        Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
603    } else {
604        _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
605        Ok(None)
606    }
607}
608
609fn _date_trunc_coarse_without_tz(
610    granularity: DateTruncGranularity,
611    value: Option<NaiveDateTime>,
612) -> Result<Option<i64>> {
613    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
614    Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
615}
616
617/// Truncates the single `value`, expressed in nanoseconds since the
618/// epoch, for granularities greater than 1 second, in taking into
619/// account that some granularities are not uniform durations of time
620/// (e.g. months are not always the same lengths, leap seconds, etc)
621fn date_trunc_coarse(
622    granularity: DateTruncGranularity,
623    value: i64,
624    tz: Option<Tz>,
625) -> Result<i64> {
626    let value = match tz {
627        Some(tz) => {
628            // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
629            // and NaiveDateTime (ISO 8601) has no concept of timezones
630            let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
631                .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
632            _date_trunc_coarse_with_tz(granularity, Some(value))
633        }
634        None => {
635            // Use chrono NaiveDateTime to clear the various fields, if we don't have a timezone.
636            let value = timestamp_ns_to_datetime(value)
637                .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
638            _date_trunc_coarse_without_tz(granularity, Some(value))
639        }
640    }?;
641
642    // `with_x(0)` are infallible because `0` are always a valid
643    Ok(value.unwrap())
644}
645
646/// Fast path for fine granularities (hour and smaller) that can be handled
647/// with simple arithmetic operations without calendar complexity.
648///
649/// This function is timezone-agnostic and should only be used when:
650/// - No timezone is specified in the input, OR
651/// - The granularity is less than hour as hour can be affected by DST transitions in some cases
652fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
653    tu: TimeUnit,
654    array: &PrimitiveArray<T>,
655    granularity: DateTruncGranularity,
656    tz_opt: Option<Arc<str>>,
657) -> Result<ArrayRef> {
658    let unit = match (tu, granularity) {
659        (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
660        (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
661        (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
662
663        (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
664        (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
665        (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
666        (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
667
668        (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
669        (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
670        (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
671        (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
672        (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
673
674        (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
675        (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
676        (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
677        (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
678        (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
679        (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
680        _ => None,
681    };
682
683    if let Some(unit) = unit {
684        let unit = unit.get();
685        let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
686            array
687                .values()
688                .iter()
689                .map(|v| *v - i64::rem_euclid(*v, unit)),
690            array.nulls().cloned(),
691        )
692        .with_timezone_opt(tz_opt);
693        Ok(Arc::new(array))
694    } else {
695        // truncate to the same or smaller unit
696        Ok(Arc::new(array.clone()))
697    }
698}
699
700// truncates a single value with the given timeunit to the specified granularity
701fn general_date_trunc(
702    tu: TimeUnit,
703    value: i64,
704    tz: Option<Tz>,
705    granularity: DateTruncGranularity,
706) -> Result<i64, DataFusionError> {
707    let scale = match tu {
708        Second => 1_000_000_000,
709        Millisecond => 1_000_000,
710        Microsecond => 1_000,
711        Nanosecond => 1,
712    };
713
714    // convert to nanoseconds
715    let nano = date_trunc_coarse(granularity, scale * value, tz)?;
716
717    let result = match tu {
718        Second => match granularity {
719            DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
720            _ => nano / 1_000_000_000,
721        },
722        Millisecond => match granularity {
723            DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
724            DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
725            _ => nano / 1_000_000,
726        },
727        Microsecond => match granularity {
728            DateTruncGranularity::Minute => {
729                nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
730            }
731            DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
732            DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
733            _ => nano / 1_000,
734        },
735        _ => match granularity {
736            DateTruncGranularity::Minute => {
737                nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
738            }
739            DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
740            DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
741            DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
742            _ => nano,
743        },
744    };
745    Ok(result)
746}
747
748fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
749    tz.as_ref()
750        .map(|tz| {
751            Tz::from_str(tz)
752                .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
753        })
754        .transpose()
755}
756
757#[cfg(test)]
758mod tests {
759    use std::sync::Arc;
760
761    use crate::datetime::date_trunc::{
762        DateTruncFunc, DateTruncGranularity, date_trunc_coarse,
763    };
764
765    use arrow::array::cast::as_primitive_array;
766    use arrow::array::types::TimestampNanosecondType;
767    use arrow::array::{Array, TimestampNanosecondArray};
768    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
769    use arrow::datatypes::{DataType, Field, TimeUnit};
770    use datafusion_common::ScalarValue;
771    use datafusion_common::config::ConfigOptions;
772    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
773
774    #[test]
775    fn date_trunc_test() {
776        let cases = vec![
777            (
778                "2020-09-08T13:42:29.190855Z",
779                "second",
780                "2020-09-08T13:42:29.000000Z",
781            ),
782            (
783                "2020-09-08T13:42:29.190855Z",
784                "minute",
785                "2020-09-08T13:42:00.000000Z",
786            ),
787            (
788                "2020-09-08T13:42:29.190855Z",
789                "hour",
790                "2020-09-08T13:00:00.000000Z",
791            ),
792            (
793                "2020-09-08T13:42:29.190855Z",
794                "day",
795                "2020-09-08T00:00:00.000000Z",
796            ),
797            (
798                "2020-09-08T13:42:29.190855Z",
799                "week",
800                "2020-09-07T00:00:00.000000Z",
801            ),
802            (
803                "2020-09-08T13:42:29.190855Z",
804                "month",
805                "2020-09-01T00:00:00.000000Z",
806            ),
807            (
808                "2020-09-08T13:42:29.190855Z",
809                "year",
810                "2020-01-01T00:00:00.000000Z",
811            ),
812            // week
813            (
814                "2021-01-01T13:42:29.190855Z",
815                "week",
816                "2020-12-28T00:00:00.000000Z",
817            ),
818            (
819                "2020-01-01T13:42:29.190855Z",
820                "week",
821                "2019-12-30T00:00:00.000000Z",
822            ),
823            // quarter
824            (
825                "2020-01-01T13:42:29.190855Z",
826                "quarter",
827                "2020-01-01T00:00:00.000000Z",
828            ),
829            (
830                "2020-02-01T13:42:29.190855Z",
831                "quarter",
832                "2020-01-01T00:00:00.000000Z",
833            ),
834            (
835                "2020-03-01T13:42:29.190855Z",
836                "quarter",
837                "2020-01-01T00:00:00.000000Z",
838            ),
839            (
840                "2020-04-01T13:42:29.190855Z",
841                "quarter",
842                "2020-04-01T00:00:00.000000Z",
843            ),
844            (
845                "2020-08-01T13:42:29.190855Z",
846                "quarter",
847                "2020-07-01T00:00:00.000000Z",
848            ),
849            (
850                "2020-11-01T13:42:29.190855Z",
851                "quarter",
852                "2020-10-01T00:00:00.000000Z",
853            ),
854            (
855                "2020-12-01T13:42:29.190855Z",
856                "quarter",
857                "2020-10-01T00:00:00.000000Z",
858            ),
859        ];
860
861        cases.iter().for_each(|(original, granularity, expected)| {
862            let left = string_to_timestamp_nanos(original).unwrap();
863            let right = string_to_timestamp_nanos(expected).unwrap();
864            let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
865            let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
866            assert_eq!(result, right, "{original} = {expected}");
867        });
868    }
869
870    #[test]
871    fn test_date_trunc_timezones() {
872        let cases = [
873            (
874                vec![
875                    "2020-09-08T00:00:00Z",
876                    "2020-09-08T01:00:00Z",
877                    "2020-09-08T02:00:00Z",
878                    "2020-09-08T03:00:00Z",
879                    "2020-09-08T04:00:00Z",
880                ],
881                Some("+00".into()),
882                vec![
883                    "2020-09-08T00:00:00Z",
884                    "2020-09-08T00:00:00Z",
885                    "2020-09-08T00:00:00Z",
886                    "2020-09-08T00:00:00Z",
887                    "2020-09-08T00:00:00Z",
888                ],
889            ),
890            (
891                vec![
892                    "2020-09-08T00:00:00Z",
893                    "2020-09-08T01:00:00Z",
894                    "2020-09-08T02:00:00Z",
895                    "2020-09-08T03:00:00Z",
896                    "2020-09-08T04:00:00Z",
897                ],
898                None,
899                vec![
900                    "2020-09-08T00:00:00Z",
901                    "2020-09-08T00:00:00Z",
902                    "2020-09-08T00:00:00Z",
903                    "2020-09-08T00:00:00Z",
904                    "2020-09-08T00:00:00Z",
905                ],
906            ),
907            (
908                vec![
909                    "2020-09-08T00:00:00Z",
910                    "2020-09-08T01:00:00Z",
911                    "2020-09-08T02:00:00Z",
912                    "2020-09-08T03:00:00Z",
913                    "2020-09-08T04:00:00Z",
914                ],
915                Some("-02".into()),
916                vec![
917                    "2020-09-07T02:00:00Z",
918                    "2020-09-07T02:00:00Z",
919                    "2020-09-08T02:00:00Z",
920                    "2020-09-08T02:00:00Z",
921                    "2020-09-08T02:00:00Z",
922                ],
923            ),
924            (
925                vec![
926                    "2020-09-08T00:00:00+05",
927                    "2020-09-08T01:00:00+05",
928                    "2020-09-08T02:00:00+05",
929                    "2020-09-08T03:00:00+05",
930                    "2020-09-08T04:00:00+05",
931                ],
932                Some("+05".into()),
933                vec![
934                    "2020-09-08T00:00:00+05",
935                    "2020-09-08T00:00:00+05",
936                    "2020-09-08T00:00:00+05",
937                    "2020-09-08T00:00:00+05",
938                    "2020-09-08T00:00:00+05",
939                ],
940            ),
941            (
942                vec![
943                    "2020-09-08T00:00:00+08",
944                    "2020-09-08T01:00:00+08",
945                    "2020-09-08T02:00:00+08",
946                    "2020-09-08T03:00:00+08",
947                    "2020-09-08T04:00:00+08",
948                ],
949                Some("+08".into()),
950                vec![
951                    "2020-09-08T00:00:00+08",
952                    "2020-09-08T00:00:00+08",
953                    "2020-09-08T00:00:00+08",
954                    "2020-09-08T00:00:00+08",
955                    "2020-09-08T00:00:00+08",
956                ],
957            ),
958            (
959                vec![
960                    "2024-10-26T23:00:00Z",
961                    "2024-10-27T00:00:00Z",
962                    "2024-10-27T01:00:00Z",
963                    "2024-10-27T02:00:00Z",
964                ],
965                Some("Europe/Berlin".into()),
966                vec![
967                    "2024-10-27T00:00:00+02",
968                    "2024-10-27T00:00:00+02",
969                    "2024-10-27T00:00:00+02",
970                    "2024-10-27T00:00:00+02",
971                ],
972            ),
973            (
974                vec![
975                    "2018-02-18T00:00:00Z",
976                    "2018-02-18T01:00:00Z",
977                    "2018-02-18T02:00:00Z",
978                    "2018-02-18T03:00:00Z",
979                    "2018-11-04T01:00:00Z",
980                    "2018-11-04T02:00:00Z",
981                    "2018-11-04T03:00:00Z",
982                    "2018-11-04T04:00:00Z",
983                ],
984                Some("America/Sao_Paulo".into()),
985                vec![
986                    "2018-02-17T00:00:00-02",
987                    "2018-02-17T00:00:00-02",
988                    "2018-02-17T00:00:00-02",
989                    "2018-02-18T00:00:00-03",
990                    "2018-11-03T00:00:00-03",
991                    "2018-11-03T00:00:00-03",
992                    "2018-11-04T01:00:00-02",
993                    "2018-11-04T01:00:00-02",
994                ],
995            ),
996        ];
997
998        cases.iter().for_each(|(original, tz_opt, expected)| {
999            let input = original
1000                .iter()
1001                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1002                .collect::<TimestampNanosecondArray>()
1003                .with_timezone_opt(tz_opt.clone());
1004            let right = expected
1005                .iter()
1006                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1007                .collect::<TimestampNanosecondArray>()
1008                .with_timezone_opt(tz_opt.clone());
1009            let batch_len = input.len();
1010            let arg_fields = vec![
1011                Field::new("a", DataType::Utf8, false).into(),
1012                Field::new("b", input.data_type().clone(), false).into(),
1013            ];
1014            let args = datafusion_expr::ScalarFunctionArgs {
1015                args: vec![
1016                    ColumnarValue::Scalar(ScalarValue::from("day")),
1017                    ColumnarValue::Array(Arc::new(input)),
1018                ],
1019                arg_fields,
1020                number_rows: batch_len,
1021                return_field: Field::new(
1022                    "f",
1023                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1024                    true,
1025                )
1026                .into(),
1027                config_options: Arc::new(ConfigOptions::default()),
1028            };
1029            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1030            if let ColumnarValue::Array(result) = result {
1031                assert_eq!(
1032                    result.data_type(),
1033                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1034                );
1035                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1036                assert_eq!(left, &right);
1037            } else {
1038                panic!("unexpected column type");
1039            }
1040        });
1041    }
1042
1043    #[test]
1044    fn test_date_trunc_hour_timezones() {
1045        let cases = [
1046            (
1047                vec![
1048                    "2020-09-08T00:30:00Z",
1049                    "2020-09-08T01:30:00Z",
1050                    "2020-09-08T02:30:00Z",
1051                    "2020-09-08T03:30:00Z",
1052                    "2020-09-08T04:30:00Z",
1053                ],
1054                Some("+00".into()),
1055                vec![
1056                    "2020-09-08T00:00:00Z",
1057                    "2020-09-08T01:00:00Z",
1058                    "2020-09-08T02:00:00Z",
1059                    "2020-09-08T03:00:00Z",
1060                    "2020-09-08T04:00:00Z",
1061                ],
1062            ),
1063            (
1064                vec![
1065                    "2020-09-08T00:30:00Z",
1066                    "2020-09-08T01:30:00Z",
1067                    "2020-09-08T02:30:00Z",
1068                    "2020-09-08T03:30:00Z",
1069                    "2020-09-08T04:30:00Z",
1070                ],
1071                None,
1072                vec![
1073                    "2020-09-08T00:00:00Z",
1074                    "2020-09-08T01:00:00Z",
1075                    "2020-09-08T02:00:00Z",
1076                    "2020-09-08T03:00:00Z",
1077                    "2020-09-08T04:00:00Z",
1078                ],
1079            ),
1080            (
1081                vec![
1082                    "2020-09-08T00:30:00Z",
1083                    "2020-09-08T01:30:00Z",
1084                    "2020-09-08T02:30:00Z",
1085                    "2020-09-08T03:30:00Z",
1086                    "2020-09-08T04:30:00Z",
1087                ],
1088                Some("-02".into()),
1089                vec![
1090                    "2020-09-08T00:00:00Z",
1091                    "2020-09-08T01:00:00Z",
1092                    "2020-09-08T02:00:00Z",
1093                    "2020-09-08T03:00:00Z",
1094                    "2020-09-08T04:00:00Z",
1095                ],
1096            ),
1097            (
1098                vec![
1099                    "2020-09-08T00:30:00+05",
1100                    "2020-09-08T01:30:00+05",
1101                    "2020-09-08T02:30:00+05",
1102                    "2020-09-08T03:30:00+05",
1103                    "2020-09-08T04:30:00+05",
1104                ],
1105                Some("+05".into()),
1106                vec![
1107                    "2020-09-08T00:00:00+05",
1108                    "2020-09-08T01:00:00+05",
1109                    "2020-09-08T02:00:00+05",
1110                    "2020-09-08T03:00:00+05",
1111                    "2020-09-08T04:00:00+05",
1112                ],
1113            ),
1114            (
1115                vec![
1116                    "2020-09-08T00:30:00+08",
1117                    "2020-09-08T01:30:00+08",
1118                    "2020-09-08T02:30:00+08",
1119                    "2020-09-08T03:30:00+08",
1120                    "2020-09-08T04:30:00+08",
1121                ],
1122                Some("+08".into()),
1123                vec![
1124                    "2020-09-08T00:00:00+08",
1125                    "2020-09-08T01:00:00+08",
1126                    "2020-09-08T02:00:00+08",
1127                    "2020-09-08T03:00:00+08",
1128                    "2020-09-08T04:00:00+08",
1129                ],
1130            ),
1131            (
1132                vec![
1133                    "2024-10-26T23:30:00Z",
1134                    "2024-10-27T00:30:00Z",
1135                    "2024-10-27T01:30:00Z",
1136                    "2024-10-27T02:30:00Z",
1137                ],
1138                Some("Europe/Berlin".into()),
1139                vec![
1140                    "2024-10-27T01:00:00+02",
1141                    "2024-10-27T02:00:00+02",
1142                    "2024-10-27T02:00:00+01",
1143                    "2024-10-27T03:00:00+01",
1144                ],
1145            ),
1146            (
1147                vec![
1148                    "2018-02-18T00:30:00Z",
1149                    "2018-02-18T01:30:00Z",
1150                    "2018-02-18T02:30:00Z",
1151                    "2018-02-18T03:30:00Z",
1152                    "2018-11-04T01:00:00Z",
1153                    "2018-11-04T02:00:00Z",
1154                    "2018-11-04T03:00:00Z",
1155                    "2018-11-04T04:00:00Z",
1156                ],
1157                Some("America/Sao_Paulo".into()),
1158                vec![
1159                    "2018-02-17T22:00:00-02",
1160                    "2018-02-17T23:00:00-02",
1161                    "2018-02-17T23:00:00-03",
1162                    "2018-02-18T00:00:00-03",
1163                    "2018-11-03T22:00:00-03",
1164                    "2018-11-03T23:00:00-03",
1165                    "2018-11-04T01:00:00-02",
1166                    "2018-11-04T02:00:00-02",
1167                ],
1168            ),
1169            (
1170                vec![
1171                    "2024-10-26T23:30:00Z",
1172                    "2024-10-27T00:30:00Z",
1173                    "2024-10-27T01:30:00Z",
1174                    "2024-10-27T02:30:00Z",
1175                ],
1176                Some("Asia/Kathmandu".into()), // UTC+5:45
1177                vec![
1178                    "2024-10-27T05:00:00+05:45",
1179                    "2024-10-27T06:00:00+05:45",
1180                    "2024-10-27T07:00:00+05:45",
1181                    "2024-10-27T08:00:00+05:45",
1182                ],
1183            ),
1184        ];
1185
1186        cases.iter().for_each(|(original, tz_opt, expected)| {
1187            let input = original
1188                .iter()
1189                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1190                .collect::<TimestampNanosecondArray>()
1191                .with_timezone_opt(tz_opt.clone());
1192            let right = expected
1193                .iter()
1194                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1195                .collect::<TimestampNanosecondArray>()
1196                .with_timezone_opt(tz_opt.clone());
1197            let batch_len = input.len();
1198            let arg_fields = vec![
1199                Field::new("a", DataType::Utf8, false).into(),
1200                Field::new("b", input.data_type().clone(), false).into(),
1201            ];
1202            let args = datafusion_expr::ScalarFunctionArgs {
1203                args: vec![
1204                    ColumnarValue::Scalar(ScalarValue::from("hour")),
1205                    ColumnarValue::Array(Arc::new(input)),
1206                ],
1207                arg_fields,
1208                number_rows: batch_len,
1209                return_field: Field::new(
1210                    "f",
1211                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1212                    true,
1213                )
1214                .into(),
1215                config_options: Arc::new(ConfigOptions::default()),
1216            };
1217            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1218            if let ColumnarValue::Array(result) = result {
1219                assert_eq!(
1220                    result.data_type(),
1221                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1222                );
1223                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1224                assert_eq!(left, &right);
1225            } else {
1226                panic!("unexpected column type");
1227            }
1228        });
1229    }
1230
1231    #[test]
1232    fn test_date_trunc_fine_granularity_timezones() {
1233        let cases = [
1234            // Test "second" granularity
1235            (
1236                vec![
1237                    "2020-09-08T13:42:29.190855Z",
1238                    "2020-09-08T13:42:30.500000Z",
1239                    "2020-09-08T13:42:31.999999Z",
1240                ],
1241                Some("+00".into()),
1242                "second",
1243                vec![
1244                    "2020-09-08T13:42:29.000000Z",
1245                    "2020-09-08T13:42:30.000000Z",
1246                    "2020-09-08T13:42:31.000000Z",
1247                ],
1248            ),
1249            (
1250                vec![
1251                    "2020-09-08T13:42:29.190855+05",
1252                    "2020-09-08T13:42:30.500000+05",
1253                    "2020-09-08T13:42:31.999999+05",
1254                ],
1255                Some("+05".into()),
1256                "second",
1257                vec![
1258                    "2020-09-08T13:42:29.000000+05",
1259                    "2020-09-08T13:42:30.000000+05",
1260                    "2020-09-08T13:42:31.000000+05",
1261                ],
1262            ),
1263            (
1264                vec![
1265                    "2020-09-08T13:42:29.190855Z",
1266                    "2020-09-08T13:42:30.500000Z",
1267                    "2020-09-08T13:42:31.999999Z",
1268                ],
1269                Some("Europe/Berlin".into()),
1270                "second",
1271                vec![
1272                    "2020-09-08T13:42:29.000000Z",
1273                    "2020-09-08T13:42:30.000000Z",
1274                    "2020-09-08T13:42:31.000000Z",
1275                ],
1276            ),
1277            // Test "minute" granularity
1278            (
1279                vec![
1280                    "2020-09-08T13:42:29.190855Z",
1281                    "2020-09-08T13:43:30.500000Z",
1282                    "2020-09-08T13:44:31.999999Z",
1283                ],
1284                Some("+00".into()),
1285                "minute",
1286                vec![
1287                    "2020-09-08T13:42:00.000000Z",
1288                    "2020-09-08T13:43:00.000000Z",
1289                    "2020-09-08T13:44:00.000000Z",
1290                ],
1291            ),
1292            (
1293                vec![
1294                    "2020-09-08T13:42:29.190855+08",
1295                    "2020-09-08T13:43:30.500000+08",
1296                    "2020-09-08T13:44:31.999999+08",
1297                ],
1298                Some("+08".into()),
1299                "minute",
1300                vec![
1301                    "2020-09-08T13:42:00.000000+08",
1302                    "2020-09-08T13:43:00.000000+08",
1303                    "2020-09-08T13:44:00.000000+08",
1304                ],
1305            ),
1306            (
1307                vec![
1308                    "2020-09-08T13:42:29.190855Z",
1309                    "2020-09-08T13:43:30.500000Z",
1310                    "2020-09-08T13:44:31.999999Z",
1311                ],
1312                Some("America/Sao_Paulo".into()),
1313                "minute",
1314                vec![
1315                    "2020-09-08T13:42:00.000000Z",
1316                    "2020-09-08T13:43:00.000000Z",
1317                    "2020-09-08T13:44:00.000000Z",
1318                ],
1319            ),
1320            // Test with None (no timezone)
1321            (
1322                vec![
1323                    "2020-09-08T13:42:29.190855Z",
1324                    "2020-09-08T13:43:30.500000Z",
1325                    "2020-09-08T13:44:31.999999Z",
1326                ],
1327                None,
1328                "minute",
1329                vec![
1330                    "2020-09-08T13:42:00.000000Z",
1331                    "2020-09-08T13:43:00.000000Z",
1332                    "2020-09-08T13:44:00.000000Z",
1333                ],
1334            ),
1335            // Test millisecond granularity
1336            (
1337                vec![
1338                    "2020-09-08T13:42:29.190855Z",
1339                    "2020-09-08T13:42:29.191999Z",
1340                    "2020-09-08T13:42:29.192500Z",
1341                ],
1342                Some("Asia/Kolkata".into()),
1343                "millisecond",
1344                vec![
1345                    "2020-09-08T19:12:29.190000+05:30",
1346                    "2020-09-08T19:12:29.191000+05:30",
1347                    "2020-09-08T19:12:29.192000+05:30",
1348                ],
1349            ),
1350        ];
1351
1352        cases
1353            .iter()
1354            .for_each(|(original, tz_opt, granularity, expected)| {
1355                let input = original
1356                    .iter()
1357                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1358                    .collect::<TimestampNanosecondArray>()
1359                    .with_timezone_opt(tz_opt.clone());
1360                let right = expected
1361                    .iter()
1362                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1363                    .collect::<TimestampNanosecondArray>()
1364                    .with_timezone_opt(tz_opt.clone());
1365                let batch_len = input.len();
1366                let arg_fields = vec![
1367                    Field::new("a", DataType::Utf8, false).into(),
1368                    Field::new("b", input.data_type().clone(), false).into(),
1369                ];
1370                let args = datafusion_expr::ScalarFunctionArgs {
1371                    args: vec![
1372                        ColumnarValue::Scalar(ScalarValue::from(*granularity)),
1373                        ColumnarValue::Array(Arc::new(input)),
1374                    ],
1375                    arg_fields,
1376                    number_rows: batch_len,
1377                    return_field: Field::new(
1378                        "f",
1379                        DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1380                        true,
1381                    )
1382                    .into(),
1383                    config_options: Arc::new(ConfigOptions::default()),
1384                };
1385                let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1386                if let ColumnarValue::Array(result) = result {
1387                    assert_eq!(
1388                        result.data_type(),
1389                        &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1390                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1391                    );
1392                    let left = as_primitive_array::<TimestampNanosecondType>(&result);
1393                    assert_eq!(
1394                        left, &right,
1395                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1396                    );
1397                } else {
1398                    panic!("unexpected column type");
1399                }
1400            });
1401    }
1402}