Skip to main content

datafusion_functions/datetime/
date_trunc.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::num::NonZeroI64;
19use std::ops::{Add, Sub};
20use std::str::FromStr;
21use std::sync::Arc;
22
23use arrow::array::temporal_conversions::{
24    MICROSECONDS, MILLISECONDS, NANOSECONDS, as_datetime_with_timezone,
25    timestamp_ns_to_datetime,
26};
27use arrow::array::timezone::Tz;
28use arrow::array::types::{
29    ArrowTimestampType, Time32MillisecondType, Time32SecondType, Time64MicrosecondType,
30    Time64NanosecondType, TimestampMicrosecondType, TimestampMillisecondType,
31    TimestampNanosecondType, TimestampSecondType,
32};
33use arrow::array::{Array, ArrayRef, PrimitiveArray};
34use arrow::datatypes::DataType::{self, Time32, Time64, Timestamp};
35use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
36use arrow::datatypes::{Field, FieldRef};
37use datafusion_common::cast::as_primitive_array;
38use datafusion_common::types::{NativeType, logical_date, logical_string};
39use datafusion_common::{
40    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, internal_err,
41};
42use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
43use datafusion_expr::{
44    ColumnarValue, Documentation, ReturnFieldArgs, ScalarFunctionArgs, ScalarUDFImpl,
45    Signature, TypeSignature, Volatility,
46};
47use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
48use datafusion_macros::user_doc;
49
50use chrono::{
51    DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
52};
53
54/// Represents the granularity for date truncation operations
55#[derive(Debug, Clone, Copy, PartialEq, Eq)]
56enum DateTruncGranularity {
57    Microsecond,
58    Millisecond,
59    Second,
60    Minute,
61    Hour,
62    Day,
63    Week,
64    Month,
65    Quarter,
66    Year,
67}
68
69impl DateTruncGranularity {
70    /// List of all supported granularity values
71    /// Cannot use HashMap here as it would require lazy_static or once_cell,
72    /// Rust does not support const HashMap yet.
73    const SUPPORTED_GRANULARITIES: &[&str] = &[
74        "microsecond",
75        "millisecond",
76        "second",
77        "minute",
78        "hour",
79        "day",
80        "week",
81        "month",
82        "quarter",
83        "year",
84    ];
85
86    /// Parse a granularity string into a DateTruncGranularity enum
87    fn from_str(s: &str) -> Result<Self> {
88        // Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
89        match s.to_lowercase().as_str() {
90            "microsecond" => Ok(Self::Microsecond),
91            "millisecond" => Ok(Self::Millisecond),
92            "second" => Ok(Self::Second),
93            "minute" => Ok(Self::Minute),
94            "hour" => Ok(Self::Hour),
95            "day" => Ok(Self::Day),
96            "week" => Ok(Self::Week),
97            "month" => Ok(Self::Month),
98            "quarter" => Ok(Self::Quarter),
99            "year" => Ok(Self::Year),
100            _ => {
101                let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
102                exec_err!(
103                    "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
104                )
105            }
106        }
107    }
108
109    /// Returns true if this granularity can be handled with simple arithmetic
110    /// (fine granularity: second, minute, millisecond, microsecond)
111    fn is_fine_granularity(&self) -> bool {
112        matches!(
113            self,
114            Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
115        )
116    }
117
118    /// Returns true if this granularity can be handled with simple arithmetic in UTC
119    /// (hour and day in addition to fine granularities)
120    fn is_fine_granularity_utc(&self) -> bool {
121        self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
122    }
123
124    /// Returns true if this granularity is valid for Time types
125    /// Time types don't have date components, so day/week/month/quarter/year are not valid
126    fn valid_for_time(&self) -> bool {
127        matches!(
128            self,
129            Self::Hour
130                | Self::Minute
131                | Self::Second
132                | Self::Millisecond
133                | Self::Microsecond
134        )
135    }
136}
137
138#[user_doc(
139    doc_section(label = "Time and Date Functions"),
140    description = "Truncates a timestamp or time value to a specified precision.",
141    syntax_example = "date_trunc(precision, expression)",
142    argument(
143        name = "precision",
144        description = r#"Time precision to truncate to. The following precisions are supported:
145
146    For Timestamp types:
147    - year / YEAR
148    - quarter / QUARTER
149    - month / MONTH
150    - week / WEEK
151    - day / DAY
152    - hour / HOUR
153    - minute / MINUTE
154    - second / SECOND
155    - millisecond / MILLISECOND
156    - microsecond / MICROSECOND
157
158    For Time types (hour, minute, second, millisecond, microsecond only):
159    - hour / HOUR
160    - minute / MINUTE
161    - second / SECOND
162    - millisecond / MILLISECOND
163    - microsecond / MICROSECOND
164"#
165    ),
166    argument(
167        name = "expression",
168        description = "Timestamp or time expression to operate on. Can be a constant, column, or function."
169    ),
170    sql_example = r#"```sql
171> SELECT date_trunc('month', '2024-05-15T10:30:00');
172+-----------------------------------------------+
173| date_trunc(Utf8("month"),Utf8("2024-05-15T10:30:00")) |
174+-----------------------------------------------+
175| 2024-05-01T00:00:00                           |
176+-----------------------------------------------+
177> SELECT date_trunc('hour', '2024-05-15T10:30:00');
178+----------------------------------------------+
179| date_trunc(Utf8("hour"),Utf8("2024-05-15T10:30:00")) |
180+----------------------------------------------+
181| 2024-05-15T10:00:00                          |
182+----------------------------------------------+
183```"#
184)]
185#[derive(Debug, PartialEq, Eq, Hash)]
186pub struct DateTruncFunc {
187    signature: Signature,
188    aliases: Vec<String>,
189}
190
191impl Default for DateTruncFunc {
192    fn default() -> Self {
193        Self::new()
194    }
195}
196
197impl DateTruncFunc {
198    pub fn new() -> Self {
199        Self {
200            signature: Signature::one_of(
201                vec![
202                    TypeSignature::Coercible(vec![
203                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
204                        Coercion::new_implicit(
205                            TypeSignatureClass::Timestamp,
206                            // Allow implicit cast from string and date to timestamp for backward compatibility
207                            vec![
208                                TypeSignatureClass::Native(logical_string()),
209                                TypeSignatureClass::Native(logical_date()),
210                            ],
211                            NativeType::Timestamp(Nanosecond, None),
212                        ),
213                    ]),
214                    TypeSignature::Coercible(vec![
215                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
216                        Coercion::new_exact(TypeSignatureClass::Time),
217                    ]),
218                ],
219                Volatility::Immutable,
220            ),
221            aliases: vec![String::from("datetrunc")],
222        }
223    }
224}
225
226impl ScalarUDFImpl for DateTruncFunc {
227    fn name(&self) -> &str {
228        "date_trunc"
229    }
230
231    fn signature(&self) -> &Signature {
232        &self.signature
233    }
234
235    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
236        internal_err!("return_field_from_args should be called instead")
237    }
238
239    fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> {
240        let field = &args.arg_fields[1];
241        let return_type = if field.data_type().is_null() {
242            Timestamp(Nanosecond, None)
243        } else {
244            field.data_type().clone()
245        };
246        Ok(Arc::new(Field::new(
247            self.name(),
248            return_type,
249            field.is_nullable(),
250        )))
251    }
252
253    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
254        let args = args.args;
255        let (granularity, array) = (&args[0], &args[1]);
256
257        let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
258            granularity
259        {
260            v.to_lowercase()
261        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
262        {
263            v.to_lowercase()
264        } else if let ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(v))) = granularity
265        {
266            v.to_lowercase()
267        } else {
268            return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
269        };
270
271        let granularity = DateTruncGranularity::from_str(&granularity_str)?;
272
273        // Check upfront if granularity is valid for Time types
274        let is_time_type = matches!(array.data_type(), Time64(_) | Time32(_));
275        if is_time_type && !granularity.valid_for_time() {
276            return exec_err!(
277                "date_trunc does not support '{}' granularity for Time types. Valid values are: hour, minute, second, millisecond, microsecond",
278                granularity_str
279            );
280        }
281
282        fn process_array<T: ArrowTimestampType>(
283            array: &dyn Array,
284            granularity: DateTruncGranularity,
285            tz_opt: &Option<Arc<str>>,
286        ) -> Result<ColumnarValue> {
287            let parsed_tz = parse_tz(tz_opt)?;
288            let array = as_primitive_array::<T>(array)?;
289
290            // fast path for fine granularity
291            // For modern timezones, it's correct to truncate "minute" in this way.
292            // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
293            // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
294            // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
295            if granularity.is_fine_granularity()
296                || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
297            {
298                let result = general_date_trunc_array_fine_granularity(
299                    T::UNIT,
300                    array,
301                    granularity,
302                    tz_opt.clone(),
303                )?;
304                return Ok(ColumnarValue::Array(result));
305            }
306
307            let array: PrimitiveArray<T> = array
308                .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
309                .with_timezone_opt(tz_opt.clone());
310            Ok(ColumnarValue::Array(Arc::new(array)))
311        }
312
313        fn process_scalar<T: ArrowTimestampType>(
314            v: &Option<i64>,
315            granularity: DateTruncGranularity,
316            tz_opt: &Option<Arc<str>>,
317        ) -> Result<ColumnarValue> {
318            let parsed_tz = parse_tz(tz_opt)?;
319            let value = if let Some(v) = v {
320                Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
321            } else {
322                None
323            };
324            let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
325            Ok(ColumnarValue::Scalar(value))
326        }
327
328        Ok(match array {
329            ColumnarValue::Scalar(ScalarValue::Null) => {
330                // NULL input returns NULL timestamp
331                ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(None, None))
332            }
333            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
334                process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
335            }
336            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
337                process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
338            }
339            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
340                process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
341            }
342            ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
343                process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
344            }
345            ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(v)) => {
346                let truncated = v.map(|val| truncate_time_nanos(val, granularity));
347                ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(truncated))
348            }
349            ColumnarValue::Scalar(ScalarValue::Time64Microsecond(v)) => {
350                let truncated = v.map(|val| truncate_time_micros(val, granularity));
351                ColumnarValue::Scalar(ScalarValue::Time64Microsecond(truncated))
352            }
353            ColumnarValue::Scalar(ScalarValue::Time32Millisecond(v)) => {
354                let truncated = v.map(|val| truncate_time_millis(val, granularity));
355                ColumnarValue::Scalar(ScalarValue::Time32Millisecond(truncated))
356            }
357            ColumnarValue::Scalar(ScalarValue::Time32Second(v)) => {
358                let truncated = v.map(|val| truncate_time_secs(val, granularity));
359                ColumnarValue::Scalar(ScalarValue::Time32Second(truncated))
360            }
361            ColumnarValue::Array(array) => {
362                let array_type = array.data_type();
363                match array_type {
364                    Timestamp(Second, tz_opt) => {
365                        process_array::<TimestampSecondType>(array, granularity, tz_opt)?
366                    }
367                    Timestamp(Millisecond, tz_opt) => process_array::<
368                        TimestampMillisecondType,
369                    >(
370                        array, granularity, tz_opt
371                    )?,
372                    Timestamp(Microsecond, tz_opt) => process_array::<
373                        TimestampMicrosecondType,
374                    >(
375                        array, granularity, tz_opt
376                    )?,
377                    Timestamp(Nanosecond, tz_opt) => process_array::<
378                        TimestampNanosecondType,
379                    >(
380                        array, granularity, tz_opt
381                    )?,
382                    Time64(Nanosecond) => {
383                        let arr = as_primitive_array::<Time64NanosecondType>(array)?;
384                        let result: PrimitiveArray<Time64NanosecondType> =
385                            arr.unary(|v| truncate_time_nanos(v, granularity));
386                        ColumnarValue::Array(Arc::new(result))
387                    }
388                    Time64(Microsecond) => {
389                        let arr = as_primitive_array::<Time64MicrosecondType>(array)?;
390                        let result: PrimitiveArray<Time64MicrosecondType> =
391                            arr.unary(|v| truncate_time_micros(v, granularity));
392                        ColumnarValue::Array(Arc::new(result))
393                    }
394                    Time32(Millisecond) => {
395                        let arr = as_primitive_array::<Time32MillisecondType>(array)?;
396                        let result: PrimitiveArray<Time32MillisecondType> =
397                            arr.unary(|v| truncate_time_millis(v, granularity));
398                        ColumnarValue::Array(Arc::new(result))
399                    }
400                    Time32(Second) => {
401                        let arr = as_primitive_array::<Time32SecondType>(array)?;
402                        let result: PrimitiveArray<Time32SecondType> =
403                            arr.unary(|v| truncate_time_secs(v, granularity));
404                        ColumnarValue::Array(Arc::new(result))
405                    }
406                    _ => {
407                        return exec_err!(
408                            "second argument of `date_trunc` is an unsupported array type: {array_type}"
409                        );
410                    }
411                }
412            }
413            _ => {
414                return exec_err!(
415                    "second argument of `date_trunc` must be timestamp, time scalar or array"
416                );
417            }
418        })
419    }
420
421    fn aliases(&self) -> &[String] {
422        &self.aliases
423    }
424
425    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
426        // The DATE_TRUNC function preserves the order of its second argument.
427        let precision = &input[0];
428        let date_value = &input[1];
429
430        if precision.sort_properties.eq(&SortProperties::Singleton) {
431            Ok(date_value.sort_properties)
432        } else {
433            Ok(SortProperties::Unordered)
434        }
435    }
436    fn documentation(&self) -> Option<&Documentation> {
437        self.doc()
438    }
439}
440
441const NANOS_PER_MICROSECOND: i64 = NANOSECONDS / MICROSECONDS;
442const NANOS_PER_MILLISECOND: i64 = NANOSECONDS / MILLISECONDS;
443const NANOS_PER_SECOND: i64 = NANOSECONDS;
444const NANOS_PER_MINUTE: i64 = 60 * NANOS_PER_SECOND;
445const NANOS_PER_HOUR: i64 = 60 * NANOS_PER_MINUTE;
446
447const MICROS_PER_MILLISECOND: i64 = MICROSECONDS / MILLISECONDS;
448const MICROS_PER_SECOND: i64 = MICROSECONDS;
449const MICROS_PER_MINUTE: i64 = 60 * MICROS_PER_SECOND;
450const MICROS_PER_HOUR: i64 = 60 * MICROS_PER_MINUTE;
451
452const MILLIS_PER_SECOND: i32 = MILLISECONDS as i32;
453const MILLIS_PER_MINUTE: i32 = 60 * MILLIS_PER_SECOND;
454const MILLIS_PER_HOUR: i32 = 60 * MILLIS_PER_MINUTE;
455
456const SECS_PER_MINUTE: i32 = 60;
457const SECS_PER_HOUR: i32 = 60 * SECS_PER_MINUTE;
458
459/// Truncate time in nanoseconds to the specified granularity
460fn truncate_time_nanos(value: i64, granularity: DateTruncGranularity) -> i64 {
461    match granularity {
462        DateTruncGranularity::Hour => value - (value % NANOS_PER_HOUR),
463        DateTruncGranularity::Minute => value - (value % NANOS_PER_MINUTE),
464        DateTruncGranularity::Second => value - (value % NANOS_PER_SECOND),
465        DateTruncGranularity::Millisecond => value - (value % NANOS_PER_MILLISECOND),
466        DateTruncGranularity::Microsecond => value - (value % NANOS_PER_MICROSECOND),
467        // Other granularities are not valid for time - should be caught earlier
468        _ => value,
469    }
470}
471
472/// Truncate time in microseconds to the specified granularity
473fn truncate_time_micros(value: i64, granularity: DateTruncGranularity) -> i64 {
474    match granularity {
475        DateTruncGranularity::Hour => value - (value % MICROS_PER_HOUR),
476        DateTruncGranularity::Minute => value - (value % MICROS_PER_MINUTE),
477        DateTruncGranularity::Second => value - (value % MICROS_PER_SECOND),
478        DateTruncGranularity::Millisecond => value - (value % MICROS_PER_MILLISECOND),
479        DateTruncGranularity::Microsecond => value, // Already at microsecond precision
480        // Other granularities are not valid for time
481        _ => value,
482    }
483}
484
485/// Truncate time in milliseconds to the specified granularity
486fn truncate_time_millis(value: i32, granularity: DateTruncGranularity) -> i32 {
487    match granularity {
488        DateTruncGranularity::Hour => value - (value % MILLIS_PER_HOUR),
489        DateTruncGranularity::Minute => value - (value % MILLIS_PER_MINUTE),
490        DateTruncGranularity::Second => value - (value % MILLIS_PER_SECOND),
491        DateTruncGranularity::Millisecond => value, // Already at millisecond precision
492        DateTruncGranularity::Microsecond => value, // Can't truncate to finer precision
493        // Other granularities are not valid for time
494        _ => value,
495    }
496}
497
498/// Truncate time in seconds to the specified granularity
499fn truncate_time_secs(value: i32, granularity: DateTruncGranularity) -> i32 {
500    match granularity {
501        DateTruncGranularity::Hour => value - (value % SECS_PER_HOUR),
502        DateTruncGranularity::Minute => value - (value % SECS_PER_MINUTE),
503        DateTruncGranularity::Second => value, // Already at second precision
504        DateTruncGranularity::Millisecond => value, // Can't truncate to finer precision
505        DateTruncGranularity::Microsecond => value, // Can't truncate to finer precision
506        // Other granularities are not valid for time
507        _ => value,
508    }
509}
510
511fn _date_trunc_coarse<T>(
512    granularity: DateTruncGranularity,
513    value: Option<T>,
514) -> Result<Option<T>>
515where
516    T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
517{
518    let value = match granularity {
519        DateTruncGranularity::Millisecond => value,
520        DateTruncGranularity::Microsecond => value,
521        DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
522        DateTruncGranularity::Minute => value
523            .and_then(|d| d.with_nanosecond(0))
524            .and_then(|d| d.with_second(0)),
525        DateTruncGranularity::Hour => value
526            .and_then(|d| d.with_nanosecond(0))
527            .and_then(|d| d.with_second(0))
528            .and_then(|d| d.with_minute(0)),
529        DateTruncGranularity::Day => value
530            .and_then(|d| d.with_nanosecond(0))
531            .and_then(|d| d.with_second(0))
532            .and_then(|d| d.with_minute(0))
533            .and_then(|d| d.with_hour(0)),
534        DateTruncGranularity::Week => value
535            .and_then(|d| d.with_nanosecond(0))
536            .and_then(|d| d.with_second(0))
537            .and_then(|d| d.with_minute(0))
538            .and_then(|d| d.with_hour(0))
539            .map(|d| {
540                d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
541            }),
542        DateTruncGranularity::Month => value
543            .and_then(|d| d.with_nanosecond(0))
544            .and_then(|d| d.with_second(0))
545            .and_then(|d| d.with_minute(0))
546            .and_then(|d| d.with_hour(0))
547            .and_then(|d| d.with_day0(0)),
548        DateTruncGranularity::Quarter => value
549            .and_then(|d| d.with_nanosecond(0))
550            .and_then(|d| d.with_second(0))
551            .and_then(|d| d.with_minute(0))
552            .and_then(|d| d.with_hour(0))
553            .and_then(|d| d.with_day0(0))
554            .and_then(|d| d.with_month(quarter_month(&d))),
555        DateTruncGranularity::Year => value
556            .and_then(|d| d.with_nanosecond(0))
557            .and_then(|d| d.with_second(0))
558            .and_then(|d| d.with_minute(0))
559            .and_then(|d| d.with_hour(0))
560            .and_then(|d| d.with_day0(0))
561            .and_then(|d| d.with_month0(0)),
562    };
563    Ok(value)
564}
565
566fn quarter_month<T>(date: &T) -> u32
567where
568    T: Datelike,
569{
570    1 + 3 * ((date.month() - 1) / 3)
571}
572
573fn _date_trunc_coarse_with_tz(
574    granularity: DateTruncGranularity,
575    value: Option<DateTime<Tz>>,
576) -> Result<Option<i64>> {
577    if let Some(value) = value {
578        let local = value.naive_local();
579        let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
580        let truncated = truncated.and_then(|truncated| {
581            match truncated.and_local_timezone(value.timezone()) {
582                LocalResult::None => {
583                    // This can happen if the date_trunc operation moves the time into
584                    // an hour that doesn't exist due to daylight savings. On known example where
585                    // this can happen is with historic dates in the America/Sao_Paulo time zone.
586                    // To account for this adjust the time by a few hours, convert to local time,
587                    // and then adjust the time back.
588                    truncated
589                        .sub(TimeDelta::try_hours(3).unwrap())
590                        .and_local_timezone(value.timezone())
591                        .single()
592                        .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
593                }
594                LocalResult::Single(datetime) => Some(datetime),
595                LocalResult::Ambiguous(datetime1, datetime2) => {
596                    // Because we are truncating from an equally or more specific time
597                    // the original time must have been within the ambiguous local time
598                    // period. Therefore the offset of one of these times should match the
599                    // offset of the original time.
600                    if datetime1.offset().fix() == value.offset().fix() {
601                        Some(datetime1)
602                    } else {
603                        Some(datetime2)
604                    }
605                }
606            }
607        });
608        Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
609    } else {
610        _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
611        Ok(None)
612    }
613}
614
615fn _date_trunc_coarse_without_tz(
616    granularity: DateTruncGranularity,
617    value: Option<NaiveDateTime>,
618) -> Result<Option<i64>> {
619    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
620    Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
621}
622
623/// Truncates the single `value`, expressed in nanoseconds since the
624/// epoch, for granularities greater than 1 second, in taking into
625/// account that some granularities are not uniform durations of time
626/// (e.g. months are not always the same lengths, leap seconds, etc)
627fn date_trunc_coarse(
628    granularity: DateTruncGranularity,
629    value: i64,
630    tz: Option<Tz>,
631) -> Result<i64> {
632    let value = match tz {
633        Some(tz) => {
634            // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
635            // and NaiveDateTime (ISO 8601) has no concept of timezones
636            let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
637                .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
638            _date_trunc_coarse_with_tz(granularity, Some(value))
639        }
640        None => {
641            // Use chrono NaiveDateTime to clear the various fields, if we don't have a timezone.
642            let value = timestamp_ns_to_datetime(value)
643                .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
644            _date_trunc_coarse_without_tz(granularity, Some(value))
645        }
646    }?;
647
648    // `with_x(0)` are infallible because `0` are always a valid
649    Ok(value.unwrap())
650}
651
652/// Fast path for fine granularities (hour and smaller) that can be handled
653/// with simple arithmetic operations without calendar complexity.
654///
655/// This function is timezone-agnostic and should only be used when:
656/// - No timezone is specified in the input, OR
657/// - The granularity is less than hour as hour can be affected by DST transitions in some cases
658fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
659    tu: TimeUnit,
660    array: &PrimitiveArray<T>,
661    granularity: DateTruncGranularity,
662    tz_opt: Option<Arc<str>>,
663) -> Result<ArrayRef> {
664    let unit = match (tu, granularity) {
665        (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
666        (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
667        (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
668
669        (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
670        (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
671        (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
672        (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
673
674        (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
675        (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
676        (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
677        (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
678        (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
679
680        (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
681        (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
682        (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
683        (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
684        (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
685        (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
686        _ => None,
687    };
688
689    if let Some(unit) = unit {
690        let unit = unit.get();
691        let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
692            array
693                .values()
694                .iter()
695                .map(|v| *v - i64::rem_euclid(*v, unit)),
696            array.nulls().cloned(),
697        )
698        .with_timezone_opt(tz_opt);
699        Ok(Arc::new(array))
700    } else {
701        // truncate to the same or smaller unit
702        Ok(Arc::new(array.clone()))
703    }
704}
705
706// truncates a single value with the given timeunit to the specified granularity
707fn general_date_trunc(
708    tu: TimeUnit,
709    value: i64,
710    tz: Option<Tz>,
711    granularity: DateTruncGranularity,
712) -> Result<i64, DataFusionError> {
713    let scale = match tu {
714        Second => 1_000_000_000,
715        Millisecond => 1_000_000,
716        Microsecond => 1_000,
717        Nanosecond => 1,
718    };
719
720    // convert to nanoseconds
721    let nano = date_trunc_coarse(
722        granularity,
723        value
724            .checked_mul(scale)
725            .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?,
726        tz,
727    )?;
728
729    let result = match tu {
730        Second => match granularity {
731            DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
732            _ => nano / 1_000_000_000,
733        },
734        Millisecond => match granularity {
735            DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
736            DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
737            _ => nano / 1_000_000,
738        },
739        Microsecond => match granularity {
740            DateTruncGranularity::Minute => {
741                nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
742            }
743            DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
744            DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
745            _ => nano / 1_000,
746        },
747        _ => match granularity {
748            DateTruncGranularity::Minute => {
749                nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
750            }
751            DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
752            DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
753            DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
754            _ => nano,
755        },
756    };
757    Ok(result)
758}
759
760fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
761    tz.as_ref()
762        .map(|tz| {
763            Tz::from_str(tz)
764                .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
765        })
766        .transpose()
767}
768
769#[cfg(test)]
770mod tests {
771    use std::sync::Arc;
772
773    use crate::datetime::date_trunc::{
774        DateTruncFunc, DateTruncGranularity, date_trunc_coarse,
775    };
776
777    use arrow::array::cast::as_primitive_array;
778    use arrow::array::types::TimestampNanosecondType;
779    use arrow::array::{Array, TimestampNanosecondArray};
780    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
781    use arrow::datatypes::{DataType, Field, TimeUnit};
782    use datafusion_common::ScalarValue;
783    use datafusion_common::config::ConfigOptions;
784    use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl};
785
786    #[test]
787    fn date_trunc_test() {
788        let cases = vec![
789            (
790                "2020-09-08T13:42:29.190855Z",
791                "second",
792                "2020-09-08T13:42:29.000000Z",
793            ),
794            (
795                "2020-09-08T13:42:29.190855Z",
796                "minute",
797                "2020-09-08T13:42:00.000000Z",
798            ),
799            (
800                "2020-09-08T13:42:29.190855Z",
801                "hour",
802                "2020-09-08T13:00:00.000000Z",
803            ),
804            (
805                "2020-09-08T13:42:29.190855Z",
806                "day",
807                "2020-09-08T00:00:00.000000Z",
808            ),
809            (
810                "2020-09-08T13:42:29.190855Z",
811                "week",
812                "2020-09-07T00:00:00.000000Z",
813            ),
814            (
815                "2020-09-08T13:42:29.190855Z",
816                "month",
817                "2020-09-01T00:00:00.000000Z",
818            ),
819            (
820                "2020-09-08T13:42:29.190855Z",
821                "year",
822                "2020-01-01T00:00:00.000000Z",
823            ),
824            // week
825            (
826                "2021-01-01T13:42:29.190855Z",
827                "week",
828                "2020-12-28T00:00:00.000000Z",
829            ),
830            (
831                "2020-01-01T13:42:29.190855Z",
832                "week",
833                "2019-12-30T00:00:00.000000Z",
834            ),
835            // quarter
836            (
837                "2020-01-01T13:42:29.190855Z",
838                "quarter",
839                "2020-01-01T00:00:00.000000Z",
840            ),
841            (
842                "2020-02-01T13:42:29.190855Z",
843                "quarter",
844                "2020-01-01T00:00:00.000000Z",
845            ),
846            (
847                "2020-03-01T13:42:29.190855Z",
848                "quarter",
849                "2020-01-01T00:00:00.000000Z",
850            ),
851            (
852                "2020-04-01T13:42:29.190855Z",
853                "quarter",
854                "2020-04-01T00:00:00.000000Z",
855            ),
856            (
857                "2020-08-01T13:42:29.190855Z",
858                "quarter",
859                "2020-07-01T00:00:00.000000Z",
860            ),
861            (
862                "2020-11-01T13:42:29.190855Z",
863                "quarter",
864                "2020-10-01T00:00:00.000000Z",
865            ),
866            (
867                "2020-12-01T13:42:29.190855Z",
868                "quarter",
869                "2020-10-01T00:00:00.000000Z",
870            ),
871        ];
872
873        cases.iter().for_each(|(original, granularity, expected)| {
874            let left = string_to_timestamp_nanos(original).unwrap();
875            let right = string_to_timestamp_nanos(expected).unwrap();
876            let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
877            let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
878            assert_eq!(result, right, "{original} = {expected}");
879        });
880    }
881
882    #[test]
883    fn test_date_trunc_timezones() {
884        let cases = [
885            (
886                vec![
887                    "2020-09-08T00:00:00Z",
888                    "2020-09-08T01:00:00Z",
889                    "2020-09-08T02:00:00Z",
890                    "2020-09-08T03:00:00Z",
891                    "2020-09-08T04:00:00Z",
892                ],
893                Some("+00".into()),
894                vec![
895                    "2020-09-08T00:00:00Z",
896                    "2020-09-08T00:00:00Z",
897                    "2020-09-08T00:00:00Z",
898                    "2020-09-08T00:00:00Z",
899                    "2020-09-08T00:00:00Z",
900                ],
901            ),
902            (
903                vec![
904                    "2020-09-08T00:00:00Z",
905                    "2020-09-08T01:00:00Z",
906                    "2020-09-08T02:00:00Z",
907                    "2020-09-08T03:00:00Z",
908                    "2020-09-08T04:00:00Z",
909                ],
910                None,
911                vec![
912                    "2020-09-08T00:00:00Z",
913                    "2020-09-08T00:00:00Z",
914                    "2020-09-08T00:00:00Z",
915                    "2020-09-08T00:00:00Z",
916                    "2020-09-08T00:00:00Z",
917                ],
918            ),
919            (
920                vec![
921                    "2020-09-08T00:00:00Z",
922                    "2020-09-08T01:00:00Z",
923                    "2020-09-08T02:00:00Z",
924                    "2020-09-08T03:00:00Z",
925                    "2020-09-08T04:00:00Z",
926                ],
927                Some("-02".into()),
928                vec![
929                    "2020-09-07T02:00:00Z",
930                    "2020-09-07T02:00:00Z",
931                    "2020-09-08T02:00:00Z",
932                    "2020-09-08T02:00:00Z",
933                    "2020-09-08T02:00:00Z",
934                ],
935            ),
936            (
937                vec![
938                    "2020-09-08T00:00:00+05",
939                    "2020-09-08T01:00:00+05",
940                    "2020-09-08T02:00:00+05",
941                    "2020-09-08T03:00:00+05",
942                    "2020-09-08T04:00:00+05",
943                ],
944                Some("+05".into()),
945                vec![
946                    "2020-09-08T00:00:00+05",
947                    "2020-09-08T00:00:00+05",
948                    "2020-09-08T00:00:00+05",
949                    "2020-09-08T00:00:00+05",
950                    "2020-09-08T00:00:00+05",
951                ],
952            ),
953            (
954                vec![
955                    "2020-09-08T00:00:00+08",
956                    "2020-09-08T01:00:00+08",
957                    "2020-09-08T02:00:00+08",
958                    "2020-09-08T03:00:00+08",
959                    "2020-09-08T04:00:00+08",
960                ],
961                Some("+08".into()),
962                vec![
963                    "2020-09-08T00:00:00+08",
964                    "2020-09-08T00:00:00+08",
965                    "2020-09-08T00:00:00+08",
966                    "2020-09-08T00:00:00+08",
967                    "2020-09-08T00:00:00+08",
968                ],
969            ),
970            (
971                vec![
972                    "2024-10-26T23:00:00Z",
973                    "2024-10-27T00:00:00Z",
974                    "2024-10-27T01:00:00Z",
975                    "2024-10-27T02:00:00Z",
976                ],
977                Some("Europe/Berlin".into()),
978                vec![
979                    "2024-10-27T00:00:00+02",
980                    "2024-10-27T00:00:00+02",
981                    "2024-10-27T00:00:00+02",
982                    "2024-10-27T00:00:00+02",
983                ],
984            ),
985            (
986                vec![
987                    "2018-02-18T00:00:00Z",
988                    "2018-02-18T01:00:00Z",
989                    "2018-02-18T02:00:00Z",
990                    "2018-02-18T03:00:00Z",
991                    "2018-11-04T01:00:00Z",
992                    "2018-11-04T02:00:00Z",
993                    "2018-11-04T03:00:00Z",
994                    "2018-11-04T04:00:00Z",
995                ],
996                Some("America/Sao_Paulo".into()),
997                vec![
998                    "2018-02-17T00:00:00-02",
999                    "2018-02-17T00:00:00-02",
1000                    "2018-02-17T00:00:00-02",
1001                    "2018-02-18T00:00:00-03",
1002                    "2018-11-03T00:00:00-03",
1003                    "2018-11-03T00:00:00-03",
1004                    "2018-11-04T01:00:00-02",
1005                    "2018-11-04T01:00:00-02",
1006                ],
1007            ),
1008        ];
1009
1010        cases.iter().for_each(|(original, tz_opt, expected)| {
1011            let input = original
1012                .iter()
1013                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1014                .collect::<TimestampNanosecondArray>()
1015                .with_timezone_opt(tz_opt.clone());
1016            let right = expected
1017                .iter()
1018                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1019                .collect::<TimestampNanosecondArray>()
1020                .with_timezone_opt(tz_opt.clone());
1021            let batch_len = input.len();
1022            let arg_fields = vec![
1023                Field::new("a", DataType::Utf8, false).into(),
1024                Field::new("b", input.data_type().clone(), false).into(),
1025            ];
1026            let args = ScalarFunctionArgs {
1027                args: vec![
1028                    ColumnarValue::Scalar(ScalarValue::from("day")),
1029                    ColumnarValue::Array(Arc::new(input)),
1030                ],
1031                arg_fields,
1032                number_rows: batch_len,
1033                return_field: Field::new(
1034                    "f",
1035                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1036                    true,
1037                )
1038                .into(),
1039                config_options: Arc::new(ConfigOptions::default()),
1040            };
1041            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1042            if let ColumnarValue::Array(result) = result {
1043                assert_eq!(
1044                    result.data_type(),
1045                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1046                );
1047                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1048                assert_eq!(left, &right);
1049            } else {
1050                panic!("unexpected column type");
1051            }
1052        });
1053    }
1054
1055    #[test]
1056    fn test_date_trunc_hour_timezones() {
1057        let cases = [
1058            (
1059                vec![
1060                    "2020-09-08T00:30:00Z",
1061                    "2020-09-08T01:30:00Z",
1062                    "2020-09-08T02:30:00Z",
1063                    "2020-09-08T03:30:00Z",
1064                    "2020-09-08T04:30:00Z",
1065                ],
1066                Some("+00".into()),
1067                vec![
1068                    "2020-09-08T00:00:00Z",
1069                    "2020-09-08T01:00:00Z",
1070                    "2020-09-08T02:00:00Z",
1071                    "2020-09-08T03:00:00Z",
1072                    "2020-09-08T04:00:00Z",
1073                ],
1074            ),
1075            (
1076                vec![
1077                    "2020-09-08T00:30:00Z",
1078                    "2020-09-08T01:30:00Z",
1079                    "2020-09-08T02:30:00Z",
1080                    "2020-09-08T03:30:00Z",
1081                    "2020-09-08T04:30:00Z",
1082                ],
1083                None,
1084                vec![
1085                    "2020-09-08T00:00:00Z",
1086                    "2020-09-08T01:00:00Z",
1087                    "2020-09-08T02:00:00Z",
1088                    "2020-09-08T03:00:00Z",
1089                    "2020-09-08T04:00:00Z",
1090                ],
1091            ),
1092            (
1093                vec![
1094                    "2020-09-08T00:30:00Z",
1095                    "2020-09-08T01:30:00Z",
1096                    "2020-09-08T02:30:00Z",
1097                    "2020-09-08T03:30:00Z",
1098                    "2020-09-08T04:30:00Z",
1099                ],
1100                Some("-02".into()),
1101                vec![
1102                    "2020-09-08T00:00:00Z",
1103                    "2020-09-08T01:00:00Z",
1104                    "2020-09-08T02:00:00Z",
1105                    "2020-09-08T03:00:00Z",
1106                    "2020-09-08T04:00:00Z",
1107                ],
1108            ),
1109            (
1110                vec![
1111                    "2020-09-08T00:30:00+05",
1112                    "2020-09-08T01:30:00+05",
1113                    "2020-09-08T02:30:00+05",
1114                    "2020-09-08T03:30:00+05",
1115                    "2020-09-08T04:30:00+05",
1116                ],
1117                Some("+05".into()),
1118                vec![
1119                    "2020-09-08T00:00:00+05",
1120                    "2020-09-08T01:00:00+05",
1121                    "2020-09-08T02:00:00+05",
1122                    "2020-09-08T03:00:00+05",
1123                    "2020-09-08T04:00:00+05",
1124                ],
1125            ),
1126            (
1127                vec![
1128                    "2020-09-08T00:30:00+08",
1129                    "2020-09-08T01:30:00+08",
1130                    "2020-09-08T02:30:00+08",
1131                    "2020-09-08T03:30:00+08",
1132                    "2020-09-08T04:30:00+08",
1133                ],
1134                Some("+08".into()),
1135                vec![
1136                    "2020-09-08T00:00:00+08",
1137                    "2020-09-08T01:00:00+08",
1138                    "2020-09-08T02:00:00+08",
1139                    "2020-09-08T03:00:00+08",
1140                    "2020-09-08T04:00:00+08",
1141                ],
1142            ),
1143            (
1144                vec![
1145                    "2024-10-26T23:30:00Z",
1146                    "2024-10-27T00:30:00Z",
1147                    "2024-10-27T01:30:00Z",
1148                    "2024-10-27T02:30:00Z",
1149                ],
1150                Some("Europe/Berlin".into()),
1151                vec![
1152                    "2024-10-27T01:00:00+02",
1153                    "2024-10-27T02:00:00+02",
1154                    "2024-10-27T02:00:00+01",
1155                    "2024-10-27T03:00:00+01",
1156                ],
1157            ),
1158            (
1159                vec![
1160                    "2018-02-18T00:30:00Z",
1161                    "2018-02-18T01:30:00Z",
1162                    "2018-02-18T02:30:00Z",
1163                    "2018-02-18T03:30:00Z",
1164                    "2018-11-04T01:00:00Z",
1165                    "2018-11-04T02:00:00Z",
1166                    "2018-11-04T03:00:00Z",
1167                    "2018-11-04T04:00:00Z",
1168                ],
1169                Some("America/Sao_Paulo".into()),
1170                vec![
1171                    "2018-02-17T22:00:00-02",
1172                    "2018-02-17T23:00:00-02",
1173                    "2018-02-17T23:00:00-03",
1174                    "2018-02-18T00:00:00-03",
1175                    "2018-11-03T22:00:00-03",
1176                    "2018-11-03T23:00:00-03",
1177                    "2018-11-04T01:00:00-02",
1178                    "2018-11-04T02:00:00-02",
1179                ],
1180            ),
1181            (
1182                vec![
1183                    "2024-10-26T23:30:00Z",
1184                    "2024-10-27T00:30:00Z",
1185                    "2024-10-27T01:30:00Z",
1186                    "2024-10-27T02:30:00Z",
1187                ],
1188                Some("Asia/Kathmandu".into()), // UTC+5:45
1189                vec![
1190                    "2024-10-27T05:00:00+05:45",
1191                    "2024-10-27T06:00:00+05:45",
1192                    "2024-10-27T07:00:00+05:45",
1193                    "2024-10-27T08:00:00+05:45",
1194                ],
1195            ),
1196        ];
1197
1198        cases.iter().for_each(|(original, tz_opt, expected)| {
1199            let input = original
1200                .iter()
1201                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1202                .collect::<TimestampNanosecondArray>()
1203                .with_timezone_opt(tz_opt.clone());
1204            let right = expected
1205                .iter()
1206                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1207                .collect::<TimestampNanosecondArray>()
1208                .with_timezone_opt(tz_opt.clone());
1209            let batch_len = input.len();
1210            let arg_fields = vec![
1211                Field::new("a", DataType::Utf8, false).into(),
1212                Field::new("b", input.data_type().clone(), false).into(),
1213            ];
1214            let args = ScalarFunctionArgs {
1215                args: vec![
1216                    ColumnarValue::Scalar(ScalarValue::from("hour")),
1217                    ColumnarValue::Array(Arc::new(input)),
1218                ],
1219                arg_fields,
1220                number_rows: batch_len,
1221                return_field: Field::new(
1222                    "f",
1223                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1224                    true,
1225                )
1226                .into(),
1227                config_options: Arc::new(ConfigOptions::default()),
1228            };
1229            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1230            if let ColumnarValue::Array(result) = result {
1231                assert_eq!(
1232                    result.data_type(),
1233                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1234                );
1235                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1236                assert_eq!(left, &right);
1237            } else {
1238                panic!("unexpected column type");
1239            }
1240        });
1241    }
1242
1243    #[test]
1244    fn test_date_trunc_fine_granularity_timezones() {
1245        let cases = [
1246            // Test "second" granularity
1247            (
1248                vec![
1249                    "2020-09-08T13:42:29.190855Z",
1250                    "2020-09-08T13:42:30.500000Z",
1251                    "2020-09-08T13:42:31.999999Z",
1252                ],
1253                Some("+00".into()),
1254                "second",
1255                vec![
1256                    "2020-09-08T13:42:29.000000Z",
1257                    "2020-09-08T13:42:30.000000Z",
1258                    "2020-09-08T13:42:31.000000Z",
1259                ],
1260            ),
1261            (
1262                vec![
1263                    "2020-09-08T13:42:29.190855+05",
1264                    "2020-09-08T13:42:30.500000+05",
1265                    "2020-09-08T13:42:31.999999+05",
1266                ],
1267                Some("+05".into()),
1268                "second",
1269                vec![
1270                    "2020-09-08T13:42:29.000000+05",
1271                    "2020-09-08T13:42:30.000000+05",
1272                    "2020-09-08T13:42:31.000000+05",
1273                ],
1274            ),
1275            (
1276                vec![
1277                    "2020-09-08T13:42:29.190855Z",
1278                    "2020-09-08T13:42:30.500000Z",
1279                    "2020-09-08T13:42:31.999999Z",
1280                ],
1281                Some("Europe/Berlin".into()),
1282                "second",
1283                vec![
1284                    "2020-09-08T13:42:29.000000Z",
1285                    "2020-09-08T13:42:30.000000Z",
1286                    "2020-09-08T13:42:31.000000Z",
1287                ],
1288            ),
1289            // Test "minute" granularity
1290            (
1291                vec![
1292                    "2020-09-08T13:42:29.190855Z",
1293                    "2020-09-08T13:43:30.500000Z",
1294                    "2020-09-08T13:44:31.999999Z",
1295                ],
1296                Some("+00".into()),
1297                "minute",
1298                vec![
1299                    "2020-09-08T13:42:00.000000Z",
1300                    "2020-09-08T13:43:00.000000Z",
1301                    "2020-09-08T13:44:00.000000Z",
1302                ],
1303            ),
1304            (
1305                vec![
1306                    "2020-09-08T13:42:29.190855+08",
1307                    "2020-09-08T13:43:30.500000+08",
1308                    "2020-09-08T13:44:31.999999+08",
1309                ],
1310                Some("+08".into()),
1311                "minute",
1312                vec![
1313                    "2020-09-08T13:42:00.000000+08",
1314                    "2020-09-08T13:43:00.000000+08",
1315                    "2020-09-08T13:44:00.000000+08",
1316                ],
1317            ),
1318            (
1319                vec![
1320                    "2020-09-08T13:42:29.190855Z",
1321                    "2020-09-08T13:43:30.500000Z",
1322                    "2020-09-08T13:44:31.999999Z",
1323                ],
1324                Some("America/Sao_Paulo".into()),
1325                "minute",
1326                vec![
1327                    "2020-09-08T13:42:00.000000Z",
1328                    "2020-09-08T13:43:00.000000Z",
1329                    "2020-09-08T13:44:00.000000Z",
1330                ],
1331            ),
1332            // Test with None (no timezone)
1333            (
1334                vec![
1335                    "2020-09-08T13:42:29.190855Z",
1336                    "2020-09-08T13:43:30.500000Z",
1337                    "2020-09-08T13:44:31.999999Z",
1338                ],
1339                None,
1340                "minute",
1341                vec![
1342                    "2020-09-08T13:42:00.000000Z",
1343                    "2020-09-08T13:43:00.000000Z",
1344                    "2020-09-08T13:44:00.000000Z",
1345                ],
1346            ),
1347            // Test millisecond granularity
1348            (
1349                vec![
1350                    "2020-09-08T13:42:29.190855Z",
1351                    "2020-09-08T13:42:29.191999Z",
1352                    "2020-09-08T13:42:29.192500Z",
1353                ],
1354                Some("Asia/Kolkata".into()),
1355                "millisecond",
1356                vec![
1357                    "2020-09-08T19:12:29.190000+05:30",
1358                    "2020-09-08T19:12:29.191000+05:30",
1359                    "2020-09-08T19:12:29.192000+05:30",
1360                ],
1361            ),
1362        ];
1363
1364        cases
1365            .iter()
1366            .for_each(|(original, tz_opt, granularity, expected)| {
1367                let input = original
1368                    .iter()
1369                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1370                    .collect::<TimestampNanosecondArray>()
1371                    .with_timezone_opt(tz_opt.clone());
1372                let right = expected
1373                    .iter()
1374                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1375                    .collect::<TimestampNanosecondArray>()
1376                    .with_timezone_opt(tz_opt.clone());
1377                let batch_len = input.len();
1378                let arg_fields = vec![
1379                    Field::new("a", DataType::Utf8, false).into(),
1380                    Field::new("b", input.data_type().clone(), false).into(),
1381                ];
1382                let args = ScalarFunctionArgs {
1383                    args: vec![
1384                        ColumnarValue::Scalar(ScalarValue::from(*granularity)),
1385                        ColumnarValue::Array(Arc::new(input)),
1386                    ],
1387                    arg_fields,
1388                    number_rows: batch_len,
1389                    return_field: Field::new(
1390                        "f",
1391                        DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1392                        true,
1393                    )
1394                    .into(),
1395                    config_options: Arc::new(ConfigOptions::default()),
1396                };
1397                let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1398                if let ColumnarValue::Array(result) = result {
1399                    assert_eq!(
1400                        result.data_type(),
1401                        &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1402                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1403                    );
1404                    let left = as_primitive_array::<TimestampNanosecondType>(&result);
1405                    assert_eq!(
1406                        left, &right,
1407                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1408                    );
1409                } else {
1410                    panic!("unexpected column type");
1411                }
1412            });
1413    }
1414}