datafusion_functions/datetime/
date_trunc.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::num::NonZeroI64;
20use std::ops::{Add, Sub};
21use std::str::FromStr;
22use std::sync::Arc;
23
24use arrow::array::temporal_conversions::{
25    as_datetime_with_timezone, timestamp_ns_to_datetime,
26};
27use arrow::array::timezone::Tz;
28use arrow::array::types::{
29    ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
30    TimestampNanosecondType, TimestampSecondType,
31};
32use arrow::array::{Array, ArrayRef, PrimitiveArray};
33use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
34use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
35use datafusion_common::cast::as_primitive_array;
36use datafusion_common::{
37    DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
38};
39use datafusion_expr::TypeSignature::Exact;
40use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
41use datafusion_expr::{
42    ColumnarValue, Documentation, ScalarUDFImpl, Signature, TIMEZONE_WILDCARD, Volatility,
43};
44use datafusion_macros::user_doc;
45
46use chrono::{
47    DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
48};
49
50/// Represents the granularity for date truncation operations
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52enum DateTruncGranularity {
53    Microsecond,
54    Millisecond,
55    Second,
56    Minute,
57    Hour,
58    Day,
59    Week,
60    Month,
61    Quarter,
62    Year,
63}
64
65impl DateTruncGranularity {
66    /// List of all supported granularity values
67    /// Cannot use HashMap here as it would require lazy_static or once_cell,
68    /// Rust does not support const HashMap yet.
69    const SUPPORTED_GRANULARITIES: &[&str] = &[
70        "microsecond",
71        "millisecond",
72        "second",
73        "minute",
74        "hour",
75        "day",
76        "week",
77        "month",
78        "quarter",
79        "year",
80    ];
81
82    /// Parse a granularity string into a DateTruncGranularity enum
83    fn from_str(s: &str) -> Result<Self> {
84        // Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
85        match s.to_lowercase().as_str() {
86            "microsecond" => Ok(Self::Microsecond),
87            "millisecond" => Ok(Self::Millisecond),
88            "second" => Ok(Self::Second),
89            "minute" => Ok(Self::Minute),
90            "hour" => Ok(Self::Hour),
91            "day" => Ok(Self::Day),
92            "week" => Ok(Self::Week),
93            "month" => Ok(Self::Month),
94            "quarter" => Ok(Self::Quarter),
95            "year" => Ok(Self::Year),
96            _ => {
97                let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
98                exec_err!(
99                    "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100                )
101            }
102        }
103    }
104
105    /// Returns true if this granularity can be handled with simple arithmetic
106    /// (fine granularity: second, minute, millisecond, microsecond)
107    fn is_fine_granularity(&self) -> bool {
108        matches!(
109            self,
110            Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
111        )
112    }
113
114    /// Returns true if this granularity can be handled with simple arithmetic in UTC
115    /// (hour and day in addition to fine granularities)
116    fn is_fine_granularity_utc(&self) -> bool {
117        self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118    }
119}
120
121#[user_doc(
122    doc_section(label = "Time and Date Functions"),
123    description = "Truncates a timestamp value to a specified precision.",
124    syntax_example = "date_trunc(precision, expression)",
125    argument(
126        name = "precision",
127        description = r#"Time precision to truncate to. The following precisions are supported:
128
129    - year / YEAR
130    - quarter / QUARTER
131    - month / MONTH
132    - week / WEEK
133    - day / DAY
134    - hour / HOUR
135    - minute / MINUTE
136    - second / SECOND
137    - millisecond / MILLISECOND
138    - microsecond / MICROSECOND
139"#
140    ),
141    argument(
142        name = "expression",
143        description = "Time expression to operate on. Can be a constant, column, or function."
144    )
145)]
146#[derive(Debug, PartialEq, Eq, Hash)]
147pub struct DateTruncFunc {
148    signature: Signature,
149    aliases: Vec<String>,
150}
151
152impl Default for DateTruncFunc {
153    fn default() -> Self {
154        Self::new()
155    }
156}
157
158impl DateTruncFunc {
159    pub fn new() -> Self {
160        Self {
161            signature: Signature::one_of(
162                vec![
163                    Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
164                    Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
165                    Exact(vec![
166                        Utf8,
167                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
168                    ]),
169                    Exact(vec![
170                        Utf8View,
171                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
172                    ]),
173                    Exact(vec![Utf8, Timestamp(Microsecond, None)]),
174                    Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
175                    Exact(vec![
176                        Utf8,
177                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
178                    ]),
179                    Exact(vec![
180                        Utf8View,
181                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
182                    ]),
183                    Exact(vec![Utf8, Timestamp(Millisecond, None)]),
184                    Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
185                    Exact(vec![
186                        Utf8,
187                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
188                    ]),
189                    Exact(vec![
190                        Utf8View,
191                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
192                    ]),
193                    Exact(vec![Utf8, Timestamp(Second, None)]),
194                    Exact(vec![Utf8View, Timestamp(Second, None)]),
195                    Exact(vec![
196                        Utf8,
197                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
198                    ]),
199                    Exact(vec![
200                        Utf8View,
201                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
202                    ]),
203                ],
204                Volatility::Immutable,
205            ),
206            aliases: vec![String::from("datetrunc")],
207        }
208    }
209}
210
211impl ScalarUDFImpl for DateTruncFunc {
212    fn as_any(&self) -> &dyn Any {
213        self
214    }
215
216    fn name(&self) -> &str {
217        "date_trunc"
218    }
219
220    fn signature(&self) -> &Signature {
221        &self.signature
222    }
223
224    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
225        match &arg_types[1] {
226            Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
227                Ok(Timestamp(Nanosecond, None))
228            }
229            Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
230            Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
231            Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
232            Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
233            _ => plan_err!(
234                "The date_trunc function can only accept timestamp as the second arg."
235            ),
236        }
237    }
238
239    fn invoke_with_args(
240        &self,
241        args: datafusion_expr::ScalarFunctionArgs,
242    ) -> Result<ColumnarValue> {
243        let args = args.args;
244        let (granularity, array) = (&args[0], &args[1]);
245
246        let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
247            granularity
248        {
249            v.to_lowercase()
250        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
251        {
252            v.to_lowercase()
253        } else {
254            return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
255        };
256
257        let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258
259        fn process_array<T: ArrowTimestampType>(
260            array: &dyn Array,
261            granularity: DateTruncGranularity,
262            tz_opt: &Option<Arc<str>>,
263        ) -> Result<ColumnarValue> {
264            let parsed_tz = parse_tz(tz_opt)?;
265            let array = as_primitive_array::<T>(array)?;
266
267            // fast path for fine granularity
268            // For modern timezones, it's correct to truncate "minute" in this way.
269            // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
270            // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
271            // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
272            if granularity.is_fine_granularity()
273                || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
274            {
275                let result = general_date_trunc_array_fine_granularity(
276                    T::UNIT,
277                    array,
278                    granularity,
279                    tz_opt.clone(),
280                )?;
281                return Ok(ColumnarValue::Array(result));
282            }
283
284            let array: PrimitiveArray<T> = array
285                .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
286                .with_timezone_opt(tz_opt.clone());
287            Ok(ColumnarValue::Array(Arc::new(array)))
288        }
289
290        fn process_scalar<T: ArrowTimestampType>(
291            v: &Option<i64>,
292            granularity: DateTruncGranularity,
293            tz_opt: &Option<Arc<str>>,
294        ) -> Result<ColumnarValue> {
295            let parsed_tz = parse_tz(tz_opt)?;
296            let value = if let Some(v) = v {
297                Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
298            } else {
299                None
300            };
301            let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
302            Ok(ColumnarValue::Scalar(value))
303        }
304
305        Ok(match array {
306            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
307                process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
308            }
309            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
310                process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
311            }
312            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
313                process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
314            }
315            ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
316                process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
317            }
318            ColumnarValue::Array(array) => {
319                let array_type = array.data_type();
320                if let Timestamp(unit, tz_opt) = array_type {
321                    match unit {
322                        Second => process_array::<TimestampSecondType>(
323                            array,
324                            granularity,
325                            tz_opt,
326                        )?,
327                        Millisecond => process_array::<TimestampMillisecondType>(
328                            array,
329                            granularity,
330                            tz_opt,
331                        )?,
332                        Microsecond => process_array::<TimestampMicrosecondType>(
333                            array,
334                            granularity,
335                            tz_opt,
336                        )?,
337                        Nanosecond => process_array::<TimestampNanosecondType>(
338                            array,
339                            granularity,
340                            tz_opt,
341                        )?,
342                    }
343                } else {
344                    return exec_err!(
345                        "second argument of `date_trunc` is an unsupported array type: {array_type}"
346                    );
347                }
348            }
349            _ => {
350                return exec_err!(
351                    "second argument of `date_trunc` must be timestamp scalar or array"
352                );
353            }
354        })
355    }
356
357    fn aliases(&self) -> &[String] {
358        &self.aliases
359    }
360
361    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
362        // The DATE_TRUNC function preserves the order of its second argument.
363        let precision = &input[0];
364        let date_value = &input[1];
365
366        if precision.sort_properties.eq(&SortProperties::Singleton) {
367            Ok(date_value.sort_properties)
368        } else {
369            Ok(SortProperties::Unordered)
370        }
371    }
372    fn documentation(&self) -> Option<&Documentation> {
373        self.doc()
374    }
375}
376
377fn _date_trunc_coarse<T>(
378    granularity: DateTruncGranularity,
379    value: Option<T>,
380) -> Result<Option<T>>
381where
382    T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
383{
384    let value = match granularity {
385        DateTruncGranularity::Millisecond => value,
386        DateTruncGranularity::Microsecond => value,
387        DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
388        DateTruncGranularity::Minute => value
389            .and_then(|d| d.with_nanosecond(0))
390            .and_then(|d| d.with_second(0)),
391        DateTruncGranularity::Hour => value
392            .and_then(|d| d.with_nanosecond(0))
393            .and_then(|d| d.with_second(0))
394            .and_then(|d| d.with_minute(0)),
395        DateTruncGranularity::Day => value
396            .and_then(|d| d.with_nanosecond(0))
397            .and_then(|d| d.with_second(0))
398            .and_then(|d| d.with_minute(0))
399            .and_then(|d| d.with_hour(0)),
400        DateTruncGranularity::Week => value
401            .and_then(|d| d.with_nanosecond(0))
402            .and_then(|d| d.with_second(0))
403            .and_then(|d| d.with_minute(0))
404            .and_then(|d| d.with_hour(0))
405            .map(|d| {
406                d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
407            }),
408        DateTruncGranularity::Month => value
409            .and_then(|d| d.with_nanosecond(0))
410            .and_then(|d| d.with_second(0))
411            .and_then(|d| d.with_minute(0))
412            .and_then(|d| d.with_hour(0))
413            .and_then(|d| d.with_day0(0)),
414        DateTruncGranularity::Quarter => value
415            .and_then(|d| d.with_nanosecond(0))
416            .and_then(|d| d.with_second(0))
417            .and_then(|d| d.with_minute(0))
418            .and_then(|d| d.with_hour(0))
419            .and_then(|d| d.with_day0(0))
420            .and_then(|d| d.with_month(quarter_month(&d))),
421        DateTruncGranularity::Year => value
422            .and_then(|d| d.with_nanosecond(0))
423            .and_then(|d| d.with_second(0))
424            .and_then(|d| d.with_minute(0))
425            .and_then(|d| d.with_hour(0))
426            .and_then(|d| d.with_day0(0))
427            .and_then(|d| d.with_month0(0)),
428    };
429    Ok(value)
430}
431
432fn quarter_month<T>(date: &T) -> u32
433where
434    T: Datelike,
435{
436    1 + 3 * ((date.month() - 1) / 3)
437}
438
439fn _date_trunc_coarse_with_tz(
440    granularity: DateTruncGranularity,
441    value: Option<DateTime<Tz>>,
442) -> Result<Option<i64>> {
443    if let Some(value) = value {
444        let local = value.naive_local();
445        let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
446        let truncated = truncated.and_then(|truncated| {
447            match truncated.and_local_timezone(value.timezone()) {
448                LocalResult::None => {
449                    // This can happen if the date_trunc operation moves the time into
450                    // an hour that doesn't exist due to daylight savings. On known example where
451                    // this can happen is with historic dates in the America/Sao_Paulo time zone.
452                    // To account for this adjust the time by a few hours, convert to local time,
453                    // and then adjust the time back.
454                    truncated
455                        .sub(TimeDelta::try_hours(3).unwrap())
456                        .and_local_timezone(value.timezone())
457                        .single()
458                        .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
459                }
460                LocalResult::Single(datetime) => Some(datetime),
461                LocalResult::Ambiguous(datetime1, datetime2) => {
462                    // Because we are truncating from an equally or more specific time
463                    // the original time must have been within the ambiguous local time
464                    // period. Therefore the offset of one of these times should match the
465                    // offset of the original time.
466                    if datetime1.offset().fix() == value.offset().fix() {
467                        Some(datetime1)
468                    } else {
469                        Some(datetime2)
470                    }
471                }
472            }
473        });
474        Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
475    } else {
476        _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
477        Ok(None)
478    }
479}
480
481fn _date_trunc_coarse_without_tz(
482    granularity: DateTruncGranularity,
483    value: Option<NaiveDateTime>,
484) -> Result<Option<i64>> {
485    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
486    Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
487}
488
489/// Truncates the single `value`, expressed in nanoseconds since the
490/// epoch, for granularities greater than 1 second, in taking into
491/// account that some granularities are not uniform durations of time
492/// (e.g. months are not always the same lengths, leap seconds, etc)
493fn date_trunc_coarse(
494    granularity: DateTruncGranularity,
495    value: i64,
496    tz: Option<Tz>,
497) -> Result<i64> {
498    let value = match tz {
499        Some(tz) => {
500            // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
501            // and NaiveDateTime (ISO 8601) has no concept of timezones
502            let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
503                .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
504            _date_trunc_coarse_with_tz(granularity, Some(value))
505        }
506        None => {
507            // Use chrono NaiveDateTime to clear the various fields, if we don't have a timezone.
508            let value = timestamp_ns_to_datetime(value)
509                .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
510            _date_trunc_coarse_without_tz(granularity, Some(value))
511        }
512    }?;
513
514    // `with_x(0)` are infallible because `0` are always a valid
515    Ok(value.unwrap())
516}
517
518/// Fast path for fine granularities (hour and smaller) that can be handled
519/// with simple arithmetic operations without calendar complexity.
520///
521/// This function is timezone-agnostic and should only be used when:
522/// - No timezone is specified in the input, OR
523/// - The granularity is less than hour as hour can be affected by DST transitions in some cases
524fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
525    tu: TimeUnit,
526    array: &PrimitiveArray<T>,
527    granularity: DateTruncGranularity,
528    tz_opt: Option<Arc<str>>,
529) -> Result<ArrayRef> {
530    let unit = match (tu, granularity) {
531        (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
532        (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
533        (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
534
535        (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
536        (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
537        (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
538        (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
539
540        (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
541        (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
542        (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
543        (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
544        (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
545
546        (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
547        (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
548        (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
549        (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
550        (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
551        (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
552        _ => None,
553    };
554
555    if let Some(unit) = unit {
556        let unit = unit.get();
557        let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
558            array
559                .values()
560                .iter()
561                .map(|v| *v - i64::rem_euclid(*v, unit)),
562            array.nulls().cloned(),
563        )
564        .with_timezone_opt(tz_opt);
565        Ok(Arc::new(array))
566    } else {
567        // truncate to the same or smaller unit
568        Ok(Arc::new(array.clone()))
569    }
570}
571
572// truncates a single value with the given timeunit to the specified granularity
573fn general_date_trunc(
574    tu: TimeUnit,
575    value: i64,
576    tz: Option<Tz>,
577    granularity: DateTruncGranularity,
578) -> Result<i64, DataFusionError> {
579    let scale = match tu {
580        Second => 1_000_000_000,
581        Millisecond => 1_000_000,
582        Microsecond => 1_000,
583        Nanosecond => 1,
584    };
585
586    // convert to nanoseconds
587    let nano = date_trunc_coarse(granularity, scale * value, tz)?;
588
589    let result = match tu {
590        Second => match granularity {
591            DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
592            _ => nano / 1_000_000_000,
593        },
594        Millisecond => match granularity {
595            DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
596            DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
597            _ => nano / 1_000_000,
598        },
599        Microsecond => match granularity {
600            DateTruncGranularity::Minute => {
601                nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
602            }
603            DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
604            DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
605            _ => nano / 1_000,
606        },
607        _ => match granularity {
608            DateTruncGranularity::Minute => {
609                nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
610            }
611            DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
612            DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
613            DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
614            _ => nano,
615        },
616    };
617    Ok(result)
618}
619
620fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
621    tz.as_ref()
622        .map(|tz| {
623            Tz::from_str(tz)
624                .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
625        })
626        .transpose()
627}
628
629#[cfg(test)]
630mod tests {
631    use std::sync::Arc;
632
633    use crate::datetime::date_trunc::{
634        DateTruncFunc, DateTruncGranularity, date_trunc_coarse,
635    };
636
637    use arrow::array::cast::as_primitive_array;
638    use arrow::array::types::TimestampNanosecondType;
639    use arrow::array::{Array, TimestampNanosecondArray};
640    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
641    use arrow::datatypes::{DataType, Field, TimeUnit};
642    use datafusion_common::ScalarValue;
643    use datafusion_common::config::ConfigOptions;
644    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
645
646    #[test]
647    fn date_trunc_test() {
648        let cases = vec![
649            (
650                "2020-09-08T13:42:29.190855Z",
651                "second",
652                "2020-09-08T13:42:29.000000Z",
653            ),
654            (
655                "2020-09-08T13:42:29.190855Z",
656                "minute",
657                "2020-09-08T13:42:00.000000Z",
658            ),
659            (
660                "2020-09-08T13:42:29.190855Z",
661                "hour",
662                "2020-09-08T13:00:00.000000Z",
663            ),
664            (
665                "2020-09-08T13:42:29.190855Z",
666                "day",
667                "2020-09-08T00:00:00.000000Z",
668            ),
669            (
670                "2020-09-08T13:42:29.190855Z",
671                "week",
672                "2020-09-07T00:00:00.000000Z",
673            ),
674            (
675                "2020-09-08T13:42:29.190855Z",
676                "month",
677                "2020-09-01T00:00:00.000000Z",
678            ),
679            (
680                "2020-09-08T13:42:29.190855Z",
681                "year",
682                "2020-01-01T00:00:00.000000Z",
683            ),
684            // week
685            (
686                "2021-01-01T13:42:29.190855Z",
687                "week",
688                "2020-12-28T00:00:00.000000Z",
689            ),
690            (
691                "2020-01-01T13:42:29.190855Z",
692                "week",
693                "2019-12-30T00:00:00.000000Z",
694            ),
695            // quarter
696            (
697                "2020-01-01T13:42:29.190855Z",
698                "quarter",
699                "2020-01-01T00:00:00.000000Z",
700            ),
701            (
702                "2020-02-01T13:42:29.190855Z",
703                "quarter",
704                "2020-01-01T00:00:00.000000Z",
705            ),
706            (
707                "2020-03-01T13:42:29.190855Z",
708                "quarter",
709                "2020-01-01T00:00:00.000000Z",
710            ),
711            (
712                "2020-04-01T13:42:29.190855Z",
713                "quarter",
714                "2020-04-01T00:00:00.000000Z",
715            ),
716            (
717                "2020-08-01T13:42:29.190855Z",
718                "quarter",
719                "2020-07-01T00:00:00.000000Z",
720            ),
721            (
722                "2020-11-01T13:42:29.190855Z",
723                "quarter",
724                "2020-10-01T00:00:00.000000Z",
725            ),
726            (
727                "2020-12-01T13:42:29.190855Z",
728                "quarter",
729                "2020-10-01T00:00:00.000000Z",
730            ),
731        ];
732
733        cases.iter().for_each(|(original, granularity, expected)| {
734            let left = string_to_timestamp_nanos(original).unwrap();
735            let right = string_to_timestamp_nanos(expected).unwrap();
736            let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
737            let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
738            assert_eq!(result, right, "{original} = {expected}");
739        });
740    }
741
742    #[test]
743    fn test_date_trunc_timezones() {
744        let cases = [
745            (
746                vec![
747                    "2020-09-08T00:00:00Z",
748                    "2020-09-08T01:00:00Z",
749                    "2020-09-08T02:00:00Z",
750                    "2020-09-08T03:00:00Z",
751                    "2020-09-08T04:00:00Z",
752                ],
753                Some("+00".into()),
754                vec![
755                    "2020-09-08T00:00:00Z",
756                    "2020-09-08T00:00:00Z",
757                    "2020-09-08T00:00:00Z",
758                    "2020-09-08T00:00:00Z",
759                    "2020-09-08T00:00:00Z",
760                ],
761            ),
762            (
763                vec![
764                    "2020-09-08T00:00:00Z",
765                    "2020-09-08T01:00:00Z",
766                    "2020-09-08T02:00:00Z",
767                    "2020-09-08T03:00:00Z",
768                    "2020-09-08T04:00:00Z",
769                ],
770                None,
771                vec![
772                    "2020-09-08T00:00:00Z",
773                    "2020-09-08T00:00:00Z",
774                    "2020-09-08T00:00:00Z",
775                    "2020-09-08T00:00:00Z",
776                    "2020-09-08T00:00:00Z",
777                ],
778            ),
779            (
780                vec![
781                    "2020-09-08T00:00:00Z",
782                    "2020-09-08T01:00:00Z",
783                    "2020-09-08T02:00:00Z",
784                    "2020-09-08T03:00:00Z",
785                    "2020-09-08T04:00:00Z",
786                ],
787                Some("-02".into()),
788                vec![
789                    "2020-09-07T02:00:00Z",
790                    "2020-09-07T02:00:00Z",
791                    "2020-09-08T02:00:00Z",
792                    "2020-09-08T02:00:00Z",
793                    "2020-09-08T02:00:00Z",
794                ],
795            ),
796            (
797                vec![
798                    "2020-09-08T00:00:00+05",
799                    "2020-09-08T01:00:00+05",
800                    "2020-09-08T02:00:00+05",
801                    "2020-09-08T03:00:00+05",
802                    "2020-09-08T04:00:00+05",
803                ],
804                Some("+05".into()),
805                vec![
806                    "2020-09-08T00:00:00+05",
807                    "2020-09-08T00:00:00+05",
808                    "2020-09-08T00:00:00+05",
809                    "2020-09-08T00:00:00+05",
810                    "2020-09-08T00:00:00+05",
811                ],
812            ),
813            (
814                vec![
815                    "2020-09-08T00:00:00+08",
816                    "2020-09-08T01:00:00+08",
817                    "2020-09-08T02:00:00+08",
818                    "2020-09-08T03:00:00+08",
819                    "2020-09-08T04:00:00+08",
820                ],
821                Some("+08".into()),
822                vec![
823                    "2020-09-08T00:00:00+08",
824                    "2020-09-08T00:00:00+08",
825                    "2020-09-08T00:00:00+08",
826                    "2020-09-08T00:00:00+08",
827                    "2020-09-08T00:00:00+08",
828                ],
829            ),
830            (
831                vec![
832                    "2024-10-26T23:00:00Z",
833                    "2024-10-27T00:00:00Z",
834                    "2024-10-27T01:00:00Z",
835                    "2024-10-27T02:00:00Z",
836                ],
837                Some("Europe/Berlin".into()),
838                vec![
839                    "2024-10-27T00:00:00+02",
840                    "2024-10-27T00:00:00+02",
841                    "2024-10-27T00:00:00+02",
842                    "2024-10-27T00:00:00+02",
843                ],
844            ),
845            (
846                vec![
847                    "2018-02-18T00:00:00Z",
848                    "2018-02-18T01:00:00Z",
849                    "2018-02-18T02:00:00Z",
850                    "2018-02-18T03:00:00Z",
851                    "2018-11-04T01:00:00Z",
852                    "2018-11-04T02:00:00Z",
853                    "2018-11-04T03:00:00Z",
854                    "2018-11-04T04:00:00Z",
855                ],
856                Some("America/Sao_Paulo".into()),
857                vec![
858                    "2018-02-17T00:00:00-02",
859                    "2018-02-17T00:00:00-02",
860                    "2018-02-17T00:00:00-02",
861                    "2018-02-18T00:00:00-03",
862                    "2018-11-03T00:00:00-03",
863                    "2018-11-03T00:00:00-03",
864                    "2018-11-04T01:00:00-02",
865                    "2018-11-04T01:00:00-02",
866                ],
867            ),
868        ];
869
870        cases.iter().for_each(|(original, tz_opt, expected)| {
871            let input = original
872                .iter()
873                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
874                .collect::<TimestampNanosecondArray>()
875                .with_timezone_opt(tz_opt.clone());
876            let right = expected
877                .iter()
878                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
879                .collect::<TimestampNanosecondArray>()
880                .with_timezone_opt(tz_opt.clone());
881            let batch_len = input.len();
882            let arg_fields = vec![
883                Field::new("a", DataType::Utf8, false).into(),
884                Field::new("b", input.data_type().clone(), false).into(),
885            ];
886            let args = datafusion_expr::ScalarFunctionArgs {
887                args: vec![
888                    ColumnarValue::Scalar(ScalarValue::from("day")),
889                    ColumnarValue::Array(Arc::new(input)),
890                ],
891                arg_fields,
892                number_rows: batch_len,
893                return_field: Field::new(
894                    "f",
895                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
896                    true,
897                )
898                .into(),
899                config_options: Arc::new(ConfigOptions::default()),
900            };
901            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
902            if let ColumnarValue::Array(result) = result {
903                assert_eq!(
904                    result.data_type(),
905                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
906                );
907                let left = as_primitive_array::<TimestampNanosecondType>(&result);
908                assert_eq!(left, &right);
909            } else {
910                panic!("unexpected column type");
911            }
912        });
913    }
914
915    #[test]
916    fn test_date_trunc_hour_timezones() {
917        let cases = [
918            (
919                vec![
920                    "2020-09-08T00:30:00Z",
921                    "2020-09-08T01:30:00Z",
922                    "2020-09-08T02:30:00Z",
923                    "2020-09-08T03:30:00Z",
924                    "2020-09-08T04:30:00Z",
925                ],
926                Some("+00".into()),
927                vec![
928                    "2020-09-08T00:00:00Z",
929                    "2020-09-08T01:00:00Z",
930                    "2020-09-08T02:00:00Z",
931                    "2020-09-08T03:00:00Z",
932                    "2020-09-08T04:00:00Z",
933                ],
934            ),
935            (
936                vec![
937                    "2020-09-08T00:30:00Z",
938                    "2020-09-08T01:30:00Z",
939                    "2020-09-08T02:30:00Z",
940                    "2020-09-08T03:30:00Z",
941                    "2020-09-08T04:30:00Z",
942                ],
943                None,
944                vec![
945                    "2020-09-08T00:00:00Z",
946                    "2020-09-08T01:00:00Z",
947                    "2020-09-08T02:00:00Z",
948                    "2020-09-08T03:00:00Z",
949                    "2020-09-08T04:00:00Z",
950                ],
951            ),
952            (
953                vec![
954                    "2020-09-08T00:30:00Z",
955                    "2020-09-08T01:30:00Z",
956                    "2020-09-08T02:30:00Z",
957                    "2020-09-08T03:30:00Z",
958                    "2020-09-08T04:30:00Z",
959                ],
960                Some("-02".into()),
961                vec![
962                    "2020-09-08T00:00:00Z",
963                    "2020-09-08T01:00:00Z",
964                    "2020-09-08T02:00:00Z",
965                    "2020-09-08T03:00:00Z",
966                    "2020-09-08T04:00:00Z",
967                ],
968            ),
969            (
970                vec![
971                    "2020-09-08T00:30:00+05",
972                    "2020-09-08T01:30:00+05",
973                    "2020-09-08T02:30:00+05",
974                    "2020-09-08T03:30:00+05",
975                    "2020-09-08T04:30:00+05",
976                ],
977                Some("+05".into()),
978                vec![
979                    "2020-09-08T00:00:00+05",
980                    "2020-09-08T01:00:00+05",
981                    "2020-09-08T02:00:00+05",
982                    "2020-09-08T03:00:00+05",
983                    "2020-09-08T04:00:00+05",
984                ],
985            ),
986            (
987                vec![
988                    "2020-09-08T00:30:00+08",
989                    "2020-09-08T01:30:00+08",
990                    "2020-09-08T02:30:00+08",
991                    "2020-09-08T03:30:00+08",
992                    "2020-09-08T04:30:00+08",
993                ],
994                Some("+08".into()),
995                vec![
996                    "2020-09-08T00:00:00+08",
997                    "2020-09-08T01:00:00+08",
998                    "2020-09-08T02:00:00+08",
999                    "2020-09-08T03:00:00+08",
1000                    "2020-09-08T04:00:00+08",
1001                ],
1002            ),
1003            (
1004                vec![
1005                    "2024-10-26T23:30:00Z",
1006                    "2024-10-27T00:30:00Z",
1007                    "2024-10-27T01:30:00Z",
1008                    "2024-10-27T02:30:00Z",
1009                ],
1010                Some("Europe/Berlin".into()),
1011                vec![
1012                    "2024-10-27T01:00:00+02",
1013                    "2024-10-27T02:00:00+02",
1014                    "2024-10-27T02:00:00+01",
1015                    "2024-10-27T03:00:00+01",
1016                ],
1017            ),
1018            (
1019                vec![
1020                    "2018-02-18T00:30:00Z",
1021                    "2018-02-18T01:30:00Z",
1022                    "2018-02-18T02:30:00Z",
1023                    "2018-02-18T03:30:00Z",
1024                    "2018-11-04T01:00:00Z",
1025                    "2018-11-04T02:00:00Z",
1026                    "2018-11-04T03:00:00Z",
1027                    "2018-11-04T04:00:00Z",
1028                ],
1029                Some("America/Sao_Paulo".into()),
1030                vec![
1031                    "2018-02-17T22:00:00-02",
1032                    "2018-02-17T23:00:00-02",
1033                    "2018-02-17T23:00:00-03",
1034                    "2018-02-18T00:00:00-03",
1035                    "2018-11-03T22:00:00-03",
1036                    "2018-11-03T23:00:00-03",
1037                    "2018-11-04T01:00:00-02",
1038                    "2018-11-04T02:00:00-02",
1039                ],
1040            ),
1041            (
1042                vec![
1043                    "2024-10-26T23:30:00Z",
1044                    "2024-10-27T00:30:00Z",
1045                    "2024-10-27T01:30:00Z",
1046                    "2024-10-27T02:30:00Z",
1047                ],
1048                Some("Asia/Kathmandu".into()), // UTC+5:45
1049                vec![
1050                    "2024-10-27T05:00:00+05:45",
1051                    "2024-10-27T06:00:00+05:45",
1052                    "2024-10-27T07:00:00+05:45",
1053                    "2024-10-27T08:00:00+05:45",
1054                ],
1055            ),
1056        ];
1057
1058        cases.iter().for_each(|(original, tz_opt, expected)| {
1059            let input = original
1060                .iter()
1061                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1062                .collect::<TimestampNanosecondArray>()
1063                .with_timezone_opt(tz_opt.clone());
1064            let right = expected
1065                .iter()
1066                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1067                .collect::<TimestampNanosecondArray>()
1068                .with_timezone_opt(tz_opt.clone());
1069            let batch_len = input.len();
1070            let arg_fields = vec![
1071                Field::new("a", DataType::Utf8, false).into(),
1072                Field::new("b", input.data_type().clone(), false).into(),
1073            ];
1074            let args = datafusion_expr::ScalarFunctionArgs {
1075                args: vec![
1076                    ColumnarValue::Scalar(ScalarValue::from("hour")),
1077                    ColumnarValue::Array(Arc::new(input)),
1078                ],
1079                arg_fields,
1080                number_rows: batch_len,
1081                return_field: Field::new(
1082                    "f",
1083                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1084                    true,
1085                )
1086                .into(),
1087                config_options: Arc::new(ConfigOptions::default()),
1088            };
1089            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1090            if let ColumnarValue::Array(result) = result {
1091                assert_eq!(
1092                    result.data_type(),
1093                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1094                );
1095                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1096                assert_eq!(left, &right);
1097            } else {
1098                panic!("unexpected column type");
1099            }
1100        });
1101    }
1102
1103    #[test]
1104    fn test_date_trunc_fine_granularity_timezones() {
1105        let cases = [
1106            // Test "second" granularity
1107            (
1108                vec![
1109                    "2020-09-08T13:42:29.190855Z",
1110                    "2020-09-08T13:42:30.500000Z",
1111                    "2020-09-08T13:42:31.999999Z",
1112                ],
1113                Some("+00".into()),
1114                "second",
1115                vec![
1116                    "2020-09-08T13:42:29.000000Z",
1117                    "2020-09-08T13:42:30.000000Z",
1118                    "2020-09-08T13:42:31.000000Z",
1119                ],
1120            ),
1121            (
1122                vec![
1123                    "2020-09-08T13:42:29.190855+05",
1124                    "2020-09-08T13:42:30.500000+05",
1125                    "2020-09-08T13:42:31.999999+05",
1126                ],
1127                Some("+05".into()),
1128                "second",
1129                vec![
1130                    "2020-09-08T13:42:29.000000+05",
1131                    "2020-09-08T13:42:30.000000+05",
1132                    "2020-09-08T13:42:31.000000+05",
1133                ],
1134            ),
1135            (
1136                vec![
1137                    "2020-09-08T13:42:29.190855Z",
1138                    "2020-09-08T13:42:30.500000Z",
1139                    "2020-09-08T13:42:31.999999Z",
1140                ],
1141                Some("Europe/Berlin".into()),
1142                "second",
1143                vec![
1144                    "2020-09-08T13:42:29.000000Z",
1145                    "2020-09-08T13:42:30.000000Z",
1146                    "2020-09-08T13:42:31.000000Z",
1147                ],
1148            ),
1149            // Test "minute" granularity
1150            (
1151                vec![
1152                    "2020-09-08T13:42:29.190855Z",
1153                    "2020-09-08T13:43:30.500000Z",
1154                    "2020-09-08T13:44:31.999999Z",
1155                ],
1156                Some("+00".into()),
1157                "minute",
1158                vec![
1159                    "2020-09-08T13:42:00.000000Z",
1160                    "2020-09-08T13:43:00.000000Z",
1161                    "2020-09-08T13:44:00.000000Z",
1162                ],
1163            ),
1164            (
1165                vec![
1166                    "2020-09-08T13:42:29.190855+08",
1167                    "2020-09-08T13:43:30.500000+08",
1168                    "2020-09-08T13:44:31.999999+08",
1169                ],
1170                Some("+08".into()),
1171                "minute",
1172                vec![
1173                    "2020-09-08T13:42:00.000000+08",
1174                    "2020-09-08T13:43:00.000000+08",
1175                    "2020-09-08T13:44:00.000000+08",
1176                ],
1177            ),
1178            (
1179                vec![
1180                    "2020-09-08T13:42:29.190855Z",
1181                    "2020-09-08T13:43:30.500000Z",
1182                    "2020-09-08T13:44:31.999999Z",
1183                ],
1184                Some("America/Sao_Paulo".into()),
1185                "minute",
1186                vec![
1187                    "2020-09-08T13:42:00.000000Z",
1188                    "2020-09-08T13:43:00.000000Z",
1189                    "2020-09-08T13:44:00.000000Z",
1190                ],
1191            ),
1192            // Test with None (no timezone)
1193            (
1194                vec![
1195                    "2020-09-08T13:42:29.190855Z",
1196                    "2020-09-08T13:43:30.500000Z",
1197                    "2020-09-08T13:44:31.999999Z",
1198                ],
1199                None,
1200                "minute",
1201                vec![
1202                    "2020-09-08T13:42:00.000000Z",
1203                    "2020-09-08T13:43:00.000000Z",
1204                    "2020-09-08T13:44:00.000000Z",
1205                ],
1206            ),
1207            // Test millisecond granularity
1208            (
1209                vec![
1210                    "2020-09-08T13:42:29.190855Z",
1211                    "2020-09-08T13:42:29.191999Z",
1212                    "2020-09-08T13:42:29.192500Z",
1213                ],
1214                Some("Asia/Kolkata".into()),
1215                "millisecond",
1216                vec![
1217                    "2020-09-08T19:12:29.190000+05:30",
1218                    "2020-09-08T19:12:29.191000+05:30",
1219                    "2020-09-08T19:12:29.192000+05:30",
1220                ],
1221            ),
1222        ];
1223
1224        cases
1225            .iter()
1226            .for_each(|(original, tz_opt, granularity, expected)| {
1227                let input = original
1228                    .iter()
1229                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1230                    .collect::<TimestampNanosecondArray>()
1231                    .with_timezone_opt(tz_opt.clone());
1232                let right = expected
1233                    .iter()
1234                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1235                    .collect::<TimestampNanosecondArray>()
1236                    .with_timezone_opt(tz_opt.clone());
1237                let batch_len = input.len();
1238                let arg_fields = vec![
1239                    Field::new("a", DataType::Utf8, false).into(),
1240                    Field::new("b", input.data_type().clone(), false).into(),
1241                ];
1242                let args = datafusion_expr::ScalarFunctionArgs {
1243                    args: vec![
1244                        ColumnarValue::Scalar(ScalarValue::from(*granularity)),
1245                        ColumnarValue::Array(Arc::new(input)),
1246                    ],
1247                    arg_fields,
1248                    number_rows: batch_len,
1249                    return_field: Field::new(
1250                        "f",
1251                        DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1252                        true,
1253                    )
1254                    .into(),
1255                    config_options: Arc::new(ConfigOptions::default()),
1256                };
1257                let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1258                if let ColumnarValue::Array(result) = result {
1259                    assert_eq!(
1260                        result.data_type(),
1261                        &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1262                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1263                    );
1264                    let left = as_primitive_array::<TimestampNanosecondType>(&result);
1265                    assert_eq!(
1266                        left, &right,
1267                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1268                    );
1269                } else {
1270                    panic!("unexpected column type");
1271                }
1272            });
1273    }
1274}