datafusion_functions/datetime/
date_trunc.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::num::NonZeroI64;
20use std::ops::{Add, Sub};
21use std::str::FromStr;
22use std::sync::Arc;
23
24use arrow::array::temporal_conversions::{
25    as_datetime_with_timezone, timestamp_ns_to_datetime,
26};
27use arrow::array::timezone::Tz;
28use arrow::array::types::{
29    ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
30    TimestampNanosecondType, TimestampSecondType,
31};
32use arrow::array::{Array, ArrayRef, PrimitiveArray};
33use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
34use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
35use datafusion_common::cast::as_primitive_array;
36use datafusion_common::{
37    exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
38};
39use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
40use datafusion_expr::TypeSignature::Exact;
41use datafusion_expr::{
42    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
43};
44use datafusion_macros::user_doc;
45
46use chrono::{
47    DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
48};
49
50/// Represents the granularity for date truncation operations
51#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52enum DateTruncGranularity {
53    Microsecond,
54    Millisecond,
55    Second,
56    Minute,
57    Hour,
58    Day,
59    Week,
60    Month,
61    Quarter,
62    Year,
63}
64
65impl DateTruncGranularity {
66    /// List of all supported granularity values
67    /// Cannot use HashMap here as it would require lazy_static or once_cell,
68    /// Rust does not support const HashMap yet.
69    const SUPPORTED_GRANULARITIES: &[&str] = &[
70        "microsecond",
71        "millisecond",
72        "second",
73        "minute",
74        "hour",
75        "day",
76        "week",
77        "month",
78        "quarter",
79        "year",
80    ];
81
82    /// Parse a granularity string into a DateTruncGranularity enum
83    fn from_str(s: &str) -> Result<Self> {
84        // Using match for O(1) lookup - compiler optimizes this into a jump table or perfect hash
85        match s.to_lowercase().as_str() {
86            "microsecond" => Ok(Self::Microsecond),
87            "millisecond" => Ok(Self::Millisecond),
88            "second" => Ok(Self::Second),
89            "minute" => Ok(Self::Minute),
90            "hour" => Ok(Self::Hour),
91            "day" => Ok(Self::Day),
92            "week" => Ok(Self::Week),
93            "month" => Ok(Self::Month),
94            "quarter" => Ok(Self::Quarter),
95            "year" => Ok(Self::Year),
96            _ => {
97                let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
98                exec_err!(
99                    "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100                )
101            }
102        }
103    }
104
105    /// Returns true if this granularity can be handled with simple arithmetic
106    /// (fine granularity: second, minute, millisecond, microsecond)
107    fn is_fine_granularity(&self) -> bool {
108        matches!(
109            self,
110            Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
111        )
112    }
113
114    /// Returns true if this granularity can be handled with simple arithmetic in UTC
115    /// (hour and day in addition to fine granularities)
116    fn is_fine_granularity_utc(&self) -> bool {
117        self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118    }
119}
120
121#[user_doc(
122    doc_section(label = "Time and Date Functions"),
123    description = "Truncates a timestamp value to a specified precision.",
124    syntax_example = "date_trunc(precision, expression)",
125    argument(
126        name = "precision",
127        description = r#"Time precision to truncate to. The following precisions are supported:
128
129    - year / YEAR
130    - quarter / QUARTER
131    - month / MONTH
132    - week / WEEK
133    - day / DAY
134    - hour / HOUR
135    - minute / MINUTE
136    - second / SECOND
137    - millisecond / MILLISECOND
138    - microsecond / MICROSECOND
139"#
140    ),
141    argument(
142        name = "expression",
143        description = "Time expression to operate on. Can be a constant, column, or function."
144    )
145)]
146#[derive(Debug, PartialEq, Eq, Hash)]
147pub struct DateTruncFunc {
148    signature: Signature,
149    aliases: Vec<String>,
150}
151
152impl Default for DateTruncFunc {
153    fn default() -> Self {
154        Self::new()
155    }
156}
157
158impl DateTruncFunc {
159    pub fn new() -> Self {
160        Self {
161            signature: Signature::one_of(
162                vec![
163                    Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
164                    Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
165                    Exact(vec![
166                        Utf8,
167                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
168                    ]),
169                    Exact(vec![
170                        Utf8View,
171                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
172                    ]),
173                    Exact(vec![Utf8, Timestamp(Microsecond, None)]),
174                    Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
175                    Exact(vec![
176                        Utf8,
177                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
178                    ]),
179                    Exact(vec![
180                        Utf8View,
181                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
182                    ]),
183                    Exact(vec![Utf8, Timestamp(Millisecond, None)]),
184                    Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
185                    Exact(vec![
186                        Utf8,
187                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
188                    ]),
189                    Exact(vec![
190                        Utf8View,
191                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
192                    ]),
193                    Exact(vec![Utf8, Timestamp(Second, None)]),
194                    Exact(vec![Utf8View, Timestamp(Second, None)]),
195                    Exact(vec![
196                        Utf8,
197                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
198                    ]),
199                    Exact(vec![
200                        Utf8View,
201                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
202                    ]),
203                ],
204                Volatility::Immutable,
205            ),
206            aliases: vec![String::from("datetrunc")],
207        }
208    }
209}
210
211impl ScalarUDFImpl for DateTruncFunc {
212    fn as_any(&self) -> &dyn Any {
213        self
214    }
215
216    fn name(&self) -> &str {
217        "date_trunc"
218    }
219
220    fn signature(&self) -> &Signature {
221        &self.signature
222    }
223
224    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
225        match &arg_types[1] {
226            Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
227                Ok(Timestamp(Nanosecond, None))
228            }
229            Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
230            Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
231            Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
232            Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
233            _ => plan_err!(
234                "The date_trunc function can only accept timestamp as the second arg."
235            ),
236        }
237    }
238
239    fn invoke_with_args(
240        &self,
241        args: datafusion_expr::ScalarFunctionArgs,
242    ) -> Result<ColumnarValue> {
243        let args = args.args;
244        let (granularity, array) = (&args[0], &args[1]);
245
246        let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
247            granularity
248        {
249            v.to_lowercase()
250        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
251        {
252            v.to_lowercase()
253        } else {
254            return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
255        };
256
257        let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258
259        fn process_array<T: ArrowTimestampType>(
260            array: &dyn Array,
261            granularity: DateTruncGranularity,
262            tz_opt: &Option<Arc<str>>,
263        ) -> Result<ColumnarValue> {
264            let parsed_tz = parse_tz(tz_opt)?;
265            let array = as_primitive_array::<T>(array)?;
266
267            // fast path for fine granularity
268            // For modern timezones, it's correct to truncate "minute" in this way.
269            // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
270            // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
271            // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
272            if granularity.is_fine_granularity()
273                || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
274            {
275                let result = general_date_trunc_array_fine_granularity(
276                    T::UNIT,
277                    array,
278                    granularity,
279                    tz_opt.clone(),
280                )?;
281                return Ok(ColumnarValue::Array(result));
282            }
283
284            let array: PrimitiveArray<T> = array
285                .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
286                .with_timezone_opt(tz_opt.clone());
287            Ok(ColumnarValue::Array(Arc::new(array)))
288        }
289
290        fn process_scalar<T: ArrowTimestampType>(
291            v: &Option<i64>,
292            granularity: DateTruncGranularity,
293            tz_opt: &Option<Arc<str>>,
294        ) -> Result<ColumnarValue> {
295            let parsed_tz = parse_tz(tz_opt)?;
296            let value = if let Some(v) = v {
297                Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
298            } else {
299                None
300            };
301            let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
302            Ok(ColumnarValue::Scalar(value))
303        }
304
305        Ok(match array {
306            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
307                process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
308            }
309            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
310                process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
311            }
312            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
313                process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
314            }
315            ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
316                process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
317            }
318            ColumnarValue::Array(array) => {
319                let array_type = array.data_type();
320                if let Timestamp(unit, tz_opt) = array_type {
321                    match unit {
322                        Second => process_array::<TimestampSecondType>(
323                            array,
324                            granularity,
325                            tz_opt,
326                        )?,
327                        Millisecond => process_array::<TimestampMillisecondType>(
328                            array,
329                            granularity,
330                            tz_opt,
331                        )?,
332                        Microsecond => process_array::<TimestampMicrosecondType>(
333                            array,
334                            granularity,
335                            tz_opt,
336                        )?,
337                        Nanosecond => process_array::<TimestampNanosecondType>(
338                            array,
339                            granularity,
340                            tz_opt,
341                        )?,
342                    }
343                } else {
344                    return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
345                }
346            }
347            _ => {
348                return exec_err!(
349                    "second argument of `date_trunc` must be timestamp scalar or array"
350                );
351            }
352        })
353    }
354
355    fn aliases(&self) -> &[String] {
356        &self.aliases
357    }
358
359    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
360        // The DATE_TRUNC function preserves the order of its second argument.
361        let precision = &input[0];
362        let date_value = &input[1];
363
364        if precision.sort_properties.eq(&SortProperties::Singleton) {
365            Ok(date_value.sort_properties)
366        } else {
367            Ok(SortProperties::Unordered)
368        }
369    }
370    fn documentation(&self) -> Option<&Documentation> {
371        self.doc()
372    }
373}
374
375fn _date_trunc_coarse<T>(
376    granularity: DateTruncGranularity,
377    value: Option<T>,
378) -> Result<Option<T>>
379where
380    T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
381{
382    let value = match granularity {
383        DateTruncGranularity::Millisecond => value,
384        DateTruncGranularity::Microsecond => value,
385        DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
386        DateTruncGranularity::Minute => value
387            .and_then(|d| d.with_nanosecond(0))
388            .and_then(|d| d.with_second(0)),
389        DateTruncGranularity::Hour => value
390            .and_then(|d| d.with_nanosecond(0))
391            .and_then(|d| d.with_second(0))
392            .and_then(|d| d.with_minute(0)),
393        DateTruncGranularity::Day => value
394            .and_then(|d| d.with_nanosecond(0))
395            .and_then(|d| d.with_second(0))
396            .and_then(|d| d.with_minute(0))
397            .and_then(|d| d.with_hour(0)),
398        DateTruncGranularity::Week => value
399            .and_then(|d| d.with_nanosecond(0))
400            .and_then(|d| d.with_second(0))
401            .and_then(|d| d.with_minute(0))
402            .and_then(|d| d.with_hour(0))
403            .map(|d| {
404                d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
405            }),
406        DateTruncGranularity::Month => value
407            .and_then(|d| d.with_nanosecond(0))
408            .and_then(|d| d.with_second(0))
409            .and_then(|d| d.with_minute(0))
410            .and_then(|d| d.with_hour(0))
411            .and_then(|d| d.with_day0(0)),
412        DateTruncGranularity::Quarter => value
413            .and_then(|d| d.with_nanosecond(0))
414            .and_then(|d| d.with_second(0))
415            .and_then(|d| d.with_minute(0))
416            .and_then(|d| d.with_hour(0))
417            .and_then(|d| d.with_day0(0))
418            .and_then(|d| d.with_month(quarter_month(&d))),
419        DateTruncGranularity::Year => value
420            .and_then(|d| d.with_nanosecond(0))
421            .and_then(|d| d.with_second(0))
422            .and_then(|d| d.with_minute(0))
423            .and_then(|d| d.with_hour(0))
424            .and_then(|d| d.with_day0(0))
425            .and_then(|d| d.with_month0(0)),
426    };
427    Ok(value)
428}
429
430fn quarter_month<T>(date: &T) -> u32
431where
432    T: Datelike,
433{
434    1 + 3 * ((date.month() - 1) / 3)
435}
436
437fn _date_trunc_coarse_with_tz(
438    granularity: DateTruncGranularity,
439    value: Option<DateTime<Tz>>,
440) -> Result<Option<i64>> {
441    if let Some(value) = value {
442        let local = value.naive_local();
443        let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
444        let truncated = truncated.and_then(|truncated| {
445            match truncated.and_local_timezone(value.timezone()) {
446                LocalResult::None => {
447                    // This can happen if the date_trunc operation moves the time into
448                    // an hour that doesn't exist due to daylight savings. On known example where
449                    // this can happen is with historic dates in the America/Sao_Paulo time zone.
450                    // To account for this adjust the time by a few hours, convert to local time,
451                    // and then adjust the time back.
452                    truncated
453                        .sub(TimeDelta::try_hours(3).unwrap())
454                        .and_local_timezone(value.timezone())
455                        .single()
456                        .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
457                }
458                LocalResult::Single(datetime) => Some(datetime),
459                LocalResult::Ambiguous(datetime1, datetime2) => {
460                    // Because we are truncating from an equally or more specific time
461                    // the original time must have been within the ambiguous local time
462                    // period. Therefore the offset of one of these times should match the
463                    // offset of the original time.
464                    if datetime1.offset().fix() == value.offset().fix() {
465                        Some(datetime1)
466                    } else {
467                        Some(datetime2)
468                    }
469                }
470            }
471        });
472        Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
473    } else {
474        _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
475        Ok(None)
476    }
477}
478
479fn _date_trunc_coarse_without_tz(
480    granularity: DateTruncGranularity,
481    value: Option<NaiveDateTime>,
482) -> Result<Option<i64>> {
483    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
484    Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
485}
486
487/// Truncates the single `value`, expressed in nanoseconds since the
488/// epoch, for granularities greater than 1 second, in taking into
489/// account that some granularities are not uniform durations of time
490/// (e.g. months are not always the same lengths, leap seconds, etc)
491fn date_trunc_coarse(
492    granularity: DateTruncGranularity,
493    value: i64,
494    tz: Option<Tz>,
495) -> Result<i64> {
496    let value = match tz {
497        Some(tz) => {
498            // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
499            // and NaiveDateTime (ISO 8601) has no concept of timezones
500            let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
501                .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
502            _date_trunc_coarse_with_tz(granularity, Some(value))
503        }
504        None => {
505            // Use chrono NaiveDateTime to clear the various fields, if we don't have a timezone.
506            let value = timestamp_ns_to_datetime(value)
507                .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
508            _date_trunc_coarse_without_tz(granularity, Some(value))
509        }
510    }?;
511
512    // `with_x(0)` are infallible because `0` are always a valid
513    Ok(value.unwrap())
514}
515
516/// Fast path for fine granularities (hour and smaller) that can be handled
517/// with simple arithmetic operations without calendar complexity.
518///
519/// This function is timezone-agnostic and should only be used when:
520/// - No timezone is specified in the input, OR
521/// - The granularity is less than hour as hour can be affected by DST transitions in some cases
522fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
523    tu: TimeUnit,
524    array: &PrimitiveArray<T>,
525    granularity: DateTruncGranularity,
526    tz_opt: Option<Arc<str>>,
527) -> Result<ArrayRef> {
528    let unit = match (tu, granularity) {
529        (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
530        (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
531        (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
532
533        (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
534        (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
535        (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
536        (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
537
538        (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
539        (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
540        (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
541        (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
542        (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
543
544        (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
545        (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
546        (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
547        (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
548        (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
549        (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
550        _ => None,
551    };
552
553    if let Some(unit) = unit {
554        let unit = unit.get();
555        let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
556            array
557                .values()
558                .iter()
559                .map(|v| *v - i64::rem_euclid(*v, unit)),
560            array.nulls().cloned(),
561        )
562        .with_timezone_opt(tz_opt);
563        Ok(Arc::new(array))
564    } else {
565        // truncate to the same or smaller unit
566        Ok(Arc::new(array.clone()))
567    }
568}
569
570// truncates a single value with the given timeunit to the specified granularity
571fn general_date_trunc(
572    tu: TimeUnit,
573    value: i64,
574    tz: Option<Tz>,
575    granularity: DateTruncGranularity,
576) -> Result<i64, DataFusionError> {
577    let scale = match tu {
578        Second => 1_000_000_000,
579        Millisecond => 1_000_000,
580        Microsecond => 1_000,
581        Nanosecond => 1,
582    };
583
584    // convert to nanoseconds
585    let nano = date_trunc_coarse(granularity, scale * value, tz)?;
586
587    let result = match tu {
588        Second => match granularity {
589            DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
590            _ => nano / 1_000_000_000,
591        },
592        Millisecond => match granularity {
593            DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
594            DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
595            _ => nano / 1_000_000,
596        },
597        Microsecond => match granularity {
598            DateTruncGranularity::Minute => {
599                nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
600            }
601            DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
602            DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
603            _ => nano / 1_000,
604        },
605        _ => match granularity {
606            DateTruncGranularity::Minute => {
607                nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
608            }
609            DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
610            DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
611            DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
612            _ => nano,
613        },
614    };
615    Ok(result)
616}
617
618fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
619    tz.as_ref()
620        .map(|tz| {
621            Tz::from_str(tz)
622                .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
623        })
624        .transpose()
625}
626
627#[cfg(test)]
628mod tests {
629    use std::sync::Arc;
630
631    use crate::datetime::date_trunc::{
632        date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
633    };
634
635    use arrow::array::cast::as_primitive_array;
636    use arrow::array::types::TimestampNanosecondType;
637    use arrow::array::{Array, TimestampNanosecondArray};
638    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
639    use arrow::datatypes::{DataType, Field, TimeUnit};
640    use datafusion_common::config::ConfigOptions;
641    use datafusion_common::ScalarValue;
642    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
643
644    #[test]
645    fn date_trunc_test() {
646        let cases = vec![
647            (
648                "2020-09-08T13:42:29.190855Z",
649                "second",
650                "2020-09-08T13:42:29.000000Z",
651            ),
652            (
653                "2020-09-08T13:42:29.190855Z",
654                "minute",
655                "2020-09-08T13:42:00.000000Z",
656            ),
657            (
658                "2020-09-08T13:42:29.190855Z",
659                "hour",
660                "2020-09-08T13:00:00.000000Z",
661            ),
662            (
663                "2020-09-08T13:42:29.190855Z",
664                "day",
665                "2020-09-08T00:00:00.000000Z",
666            ),
667            (
668                "2020-09-08T13:42:29.190855Z",
669                "week",
670                "2020-09-07T00:00:00.000000Z",
671            ),
672            (
673                "2020-09-08T13:42:29.190855Z",
674                "month",
675                "2020-09-01T00:00:00.000000Z",
676            ),
677            (
678                "2020-09-08T13:42:29.190855Z",
679                "year",
680                "2020-01-01T00:00:00.000000Z",
681            ),
682            // week
683            (
684                "2021-01-01T13:42:29.190855Z",
685                "week",
686                "2020-12-28T00:00:00.000000Z",
687            ),
688            (
689                "2020-01-01T13:42:29.190855Z",
690                "week",
691                "2019-12-30T00:00:00.000000Z",
692            ),
693            // quarter
694            (
695                "2020-01-01T13:42:29.190855Z",
696                "quarter",
697                "2020-01-01T00:00:00.000000Z",
698            ),
699            (
700                "2020-02-01T13:42:29.190855Z",
701                "quarter",
702                "2020-01-01T00:00:00.000000Z",
703            ),
704            (
705                "2020-03-01T13:42:29.190855Z",
706                "quarter",
707                "2020-01-01T00:00:00.000000Z",
708            ),
709            (
710                "2020-04-01T13:42:29.190855Z",
711                "quarter",
712                "2020-04-01T00:00:00.000000Z",
713            ),
714            (
715                "2020-08-01T13:42:29.190855Z",
716                "quarter",
717                "2020-07-01T00:00:00.000000Z",
718            ),
719            (
720                "2020-11-01T13:42:29.190855Z",
721                "quarter",
722                "2020-10-01T00:00:00.000000Z",
723            ),
724            (
725                "2020-12-01T13:42:29.190855Z",
726                "quarter",
727                "2020-10-01T00:00:00.000000Z",
728            ),
729        ];
730
731        cases.iter().for_each(|(original, granularity, expected)| {
732            let left = string_to_timestamp_nanos(original).unwrap();
733            let right = string_to_timestamp_nanos(expected).unwrap();
734            let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
735            let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
736            assert_eq!(result, right, "{original} = {expected}");
737        });
738    }
739
740    #[test]
741    fn test_date_trunc_timezones() {
742        let cases = [
743            (
744                vec![
745                    "2020-09-08T00:00:00Z",
746                    "2020-09-08T01:00:00Z",
747                    "2020-09-08T02:00:00Z",
748                    "2020-09-08T03:00:00Z",
749                    "2020-09-08T04:00:00Z",
750                ],
751                Some("+00".into()),
752                vec![
753                    "2020-09-08T00:00:00Z",
754                    "2020-09-08T00:00:00Z",
755                    "2020-09-08T00:00:00Z",
756                    "2020-09-08T00:00:00Z",
757                    "2020-09-08T00:00:00Z",
758                ],
759            ),
760            (
761                vec![
762                    "2020-09-08T00:00:00Z",
763                    "2020-09-08T01:00:00Z",
764                    "2020-09-08T02:00:00Z",
765                    "2020-09-08T03:00:00Z",
766                    "2020-09-08T04:00:00Z",
767                ],
768                None,
769                vec![
770                    "2020-09-08T00:00:00Z",
771                    "2020-09-08T00:00:00Z",
772                    "2020-09-08T00:00:00Z",
773                    "2020-09-08T00:00:00Z",
774                    "2020-09-08T00:00:00Z",
775                ],
776            ),
777            (
778                vec![
779                    "2020-09-08T00:00:00Z",
780                    "2020-09-08T01:00:00Z",
781                    "2020-09-08T02:00:00Z",
782                    "2020-09-08T03:00:00Z",
783                    "2020-09-08T04:00:00Z",
784                ],
785                Some("-02".into()),
786                vec![
787                    "2020-09-07T02:00:00Z",
788                    "2020-09-07T02:00:00Z",
789                    "2020-09-08T02:00:00Z",
790                    "2020-09-08T02:00:00Z",
791                    "2020-09-08T02:00:00Z",
792                ],
793            ),
794            (
795                vec![
796                    "2020-09-08T00:00:00+05",
797                    "2020-09-08T01:00:00+05",
798                    "2020-09-08T02:00:00+05",
799                    "2020-09-08T03:00:00+05",
800                    "2020-09-08T04:00:00+05",
801                ],
802                Some("+05".into()),
803                vec![
804                    "2020-09-08T00:00:00+05",
805                    "2020-09-08T00:00:00+05",
806                    "2020-09-08T00:00:00+05",
807                    "2020-09-08T00:00:00+05",
808                    "2020-09-08T00:00:00+05",
809                ],
810            ),
811            (
812                vec![
813                    "2020-09-08T00:00:00+08",
814                    "2020-09-08T01:00:00+08",
815                    "2020-09-08T02:00:00+08",
816                    "2020-09-08T03:00:00+08",
817                    "2020-09-08T04:00:00+08",
818                ],
819                Some("+08".into()),
820                vec![
821                    "2020-09-08T00:00:00+08",
822                    "2020-09-08T00:00:00+08",
823                    "2020-09-08T00:00:00+08",
824                    "2020-09-08T00:00:00+08",
825                    "2020-09-08T00:00:00+08",
826                ],
827            ),
828            (
829                vec![
830                    "2024-10-26T23:00:00Z",
831                    "2024-10-27T00:00:00Z",
832                    "2024-10-27T01:00:00Z",
833                    "2024-10-27T02:00:00Z",
834                ],
835                Some("Europe/Berlin".into()),
836                vec![
837                    "2024-10-27T00:00:00+02",
838                    "2024-10-27T00:00:00+02",
839                    "2024-10-27T00:00:00+02",
840                    "2024-10-27T00:00:00+02",
841                ],
842            ),
843            (
844                vec![
845                    "2018-02-18T00:00:00Z",
846                    "2018-02-18T01:00:00Z",
847                    "2018-02-18T02:00:00Z",
848                    "2018-02-18T03:00:00Z",
849                    "2018-11-04T01:00:00Z",
850                    "2018-11-04T02:00:00Z",
851                    "2018-11-04T03:00:00Z",
852                    "2018-11-04T04:00:00Z",
853                ],
854                Some("America/Sao_Paulo".into()),
855                vec![
856                    "2018-02-17T00:00:00-02",
857                    "2018-02-17T00:00:00-02",
858                    "2018-02-17T00:00:00-02",
859                    "2018-02-18T00:00:00-03",
860                    "2018-11-03T00:00:00-03",
861                    "2018-11-03T00:00:00-03",
862                    "2018-11-04T01:00:00-02",
863                    "2018-11-04T01:00:00-02",
864                ],
865            ),
866        ];
867
868        cases.iter().for_each(|(original, tz_opt, expected)| {
869            let input = original
870                .iter()
871                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
872                .collect::<TimestampNanosecondArray>()
873                .with_timezone_opt(tz_opt.clone());
874            let right = expected
875                .iter()
876                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
877                .collect::<TimestampNanosecondArray>()
878                .with_timezone_opt(tz_opt.clone());
879            let batch_len = input.len();
880            let arg_fields = vec![
881                Field::new("a", DataType::Utf8, false).into(),
882                Field::new("b", input.data_type().clone(), false).into(),
883            ];
884            let args = datafusion_expr::ScalarFunctionArgs {
885                args: vec![
886                    ColumnarValue::Scalar(ScalarValue::from("day")),
887                    ColumnarValue::Array(Arc::new(input)),
888                ],
889                arg_fields,
890                number_rows: batch_len,
891                return_field: Field::new(
892                    "f",
893                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
894                    true,
895                )
896                .into(),
897                config_options: Arc::new(ConfigOptions::default()),
898            };
899            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
900            if let ColumnarValue::Array(result) = result {
901                assert_eq!(
902                    result.data_type(),
903                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
904                );
905                let left = as_primitive_array::<TimestampNanosecondType>(&result);
906                assert_eq!(left, &right);
907            } else {
908                panic!("unexpected column type");
909            }
910        });
911    }
912
913    #[test]
914    fn test_date_trunc_hour_timezones() {
915        let cases = [
916            (
917                vec![
918                    "2020-09-08T00:30:00Z",
919                    "2020-09-08T01:30:00Z",
920                    "2020-09-08T02:30:00Z",
921                    "2020-09-08T03:30:00Z",
922                    "2020-09-08T04:30:00Z",
923                ],
924                Some("+00".into()),
925                vec![
926                    "2020-09-08T00:00:00Z",
927                    "2020-09-08T01:00:00Z",
928                    "2020-09-08T02:00:00Z",
929                    "2020-09-08T03:00:00Z",
930                    "2020-09-08T04:00:00Z",
931                ],
932            ),
933            (
934                vec![
935                    "2020-09-08T00:30:00Z",
936                    "2020-09-08T01:30:00Z",
937                    "2020-09-08T02:30:00Z",
938                    "2020-09-08T03:30:00Z",
939                    "2020-09-08T04:30:00Z",
940                ],
941                None,
942                vec![
943                    "2020-09-08T00:00:00Z",
944                    "2020-09-08T01:00:00Z",
945                    "2020-09-08T02:00:00Z",
946                    "2020-09-08T03:00:00Z",
947                    "2020-09-08T04:00:00Z",
948                ],
949            ),
950            (
951                vec![
952                    "2020-09-08T00:30:00Z",
953                    "2020-09-08T01:30:00Z",
954                    "2020-09-08T02:30:00Z",
955                    "2020-09-08T03:30:00Z",
956                    "2020-09-08T04:30:00Z",
957                ],
958                Some("-02".into()),
959                vec![
960                    "2020-09-08T00:00:00Z",
961                    "2020-09-08T01:00:00Z",
962                    "2020-09-08T02:00:00Z",
963                    "2020-09-08T03:00:00Z",
964                    "2020-09-08T04:00:00Z",
965                ],
966            ),
967            (
968                vec![
969                    "2020-09-08T00:30:00+05",
970                    "2020-09-08T01:30:00+05",
971                    "2020-09-08T02:30:00+05",
972                    "2020-09-08T03:30:00+05",
973                    "2020-09-08T04:30:00+05",
974                ],
975                Some("+05".into()),
976                vec![
977                    "2020-09-08T00:00:00+05",
978                    "2020-09-08T01:00:00+05",
979                    "2020-09-08T02:00:00+05",
980                    "2020-09-08T03:00:00+05",
981                    "2020-09-08T04:00:00+05",
982                ],
983            ),
984            (
985                vec![
986                    "2020-09-08T00:30:00+08",
987                    "2020-09-08T01:30:00+08",
988                    "2020-09-08T02:30:00+08",
989                    "2020-09-08T03:30:00+08",
990                    "2020-09-08T04:30:00+08",
991                ],
992                Some("+08".into()),
993                vec![
994                    "2020-09-08T00:00:00+08",
995                    "2020-09-08T01:00:00+08",
996                    "2020-09-08T02:00:00+08",
997                    "2020-09-08T03:00:00+08",
998                    "2020-09-08T04:00:00+08",
999                ],
1000            ),
1001            (
1002                vec![
1003                    "2024-10-26T23:30:00Z",
1004                    "2024-10-27T00:30:00Z",
1005                    "2024-10-27T01:30:00Z",
1006                    "2024-10-27T02:30:00Z",
1007                ],
1008                Some("Europe/Berlin".into()),
1009                vec![
1010                    "2024-10-27T01:00:00+02",
1011                    "2024-10-27T02:00:00+02",
1012                    "2024-10-27T02:00:00+01",
1013                    "2024-10-27T03:00:00+01",
1014                ],
1015            ),
1016            (
1017                vec![
1018                    "2018-02-18T00:30:00Z",
1019                    "2018-02-18T01:30:00Z",
1020                    "2018-02-18T02:30:00Z",
1021                    "2018-02-18T03:30:00Z",
1022                    "2018-11-04T01:00:00Z",
1023                    "2018-11-04T02:00:00Z",
1024                    "2018-11-04T03:00:00Z",
1025                    "2018-11-04T04:00:00Z",
1026                ],
1027                Some("America/Sao_Paulo".into()),
1028                vec![
1029                    "2018-02-17T22:00:00-02",
1030                    "2018-02-17T23:00:00-02",
1031                    "2018-02-17T23:00:00-03",
1032                    "2018-02-18T00:00:00-03",
1033                    "2018-11-03T22:00:00-03",
1034                    "2018-11-03T23:00:00-03",
1035                    "2018-11-04T01:00:00-02",
1036                    "2018-11-04T02:00:00-02",
1037                ],
1038            ),
1039            (
1040                vec![
1041                    "2024-10-26T23:30:00Z",
1042                    "2024-10-27T00:30:00Z",
1043                    "2024-10-27T01:30:00Z",
1044                    "2024-10-27T02:30:00Z",
1045                ],
1046                Some("Asia/Kathmandu".into()), // UTC+5:45
1047                vec![
1048                    "2024-10-27T05:00:00+05:45",
1049                    "2024-10-27T06:00:00+05:45",
1050                    "2024-10-27T07:00:00+05:45",
1051                    "2024-10-27T08:00:00+05:45",
1052                ],
1053            ),
1054        ];
1055
1056        cases.iter().for_each(|(original, tz_opt, expected)| {
1057            let input = original
1058                .iter()
1059                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1060                .collect::<TimestampNanosecondArray>()
1061                .with_timezone_opt(tz_opt.clone());
1062            let right = expected
1063                .iter()
1064                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1065                .collect::<TimestampNanosecondArray>()
1066                .with_timezone_opt(tz_opt.clone());
1067            let batch_len = input.len();
1068            let arg_fields = vec![
1069                Field::new("a", DataType::Utf8, false).into(),
1070                Field::new("b", input.data_type().clone(), false).into(),
1071            ];
1072            let args = datafusion_expr::ScalarFunctionArgs {
1073                args: vec![
1074                    ColumnarValue::Scalar(ScalarValue::from("hour")),
1075                    ColumnarValue::Array(Arc::new(input)),
1076                ],
1077                arg_fields,
1078                number_rows: batch_len,
1079                return_field: Field::new(
1080                    "f",
1081                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1082                    true,
1083                )
1084                .into(),
1085                config_options: Arc::new(ConfigOptions::default()),
1086            };
1087            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1088            if let ColumnarValue::Array(result) = result {
1089                assert_eq!(
1090                    result.data_type(),
1091                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1092                );
1093                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1094                assert_eq!(left, &right);
1095            } else {
1096                panic!("unexpected column type");
1097            }
1098        });
1099    }
1100
1101    #[test]
1102    fn test_date_trunc_fine_granularity_timezones() {
1103        let cases = [
1104            // Test "second" granularity
1105            (
1106                vec![
1107                    "2020-09-08T13:42:29.190855Z",
1108                    "2020-09-08T13:42:30.500000Z",
1109                    "2020-09-08T13:42:31.999999Z",
1110                ],
1111                Some("+00".into()),
1112                "second",
1113                vec![
1114                    "2020-09-08T13:42:29.000000Z",
1115                    "2020-09-08T13:42:30.000000Z",
1116                    "2020-09-08T13:42:31.000000Z",
1117                ],
1118            ),
1119            (
1120                vec![
1121                    "2020-09-08T13:42:29.190855+05",
1122                    "2020-09-08T13:42:30.500000+05",
1123                    "2020-09-08T13:42:31.999999+05",
1124                ],
1125                Some("+05".into()),
1126                "second",
1127                vec![
1128                    "2020-09-08T13:42:29.000000+05",
1129                    "2020-09-08T13:42:30.000000+05",
1130                    "2020-09-08T13:42:31.000000+05",
1131                ],
1132            ),
1133            (
1134                vec![
1135                    "2020-09-08T13:42:29.190855Z",
1136                    "2020-09-08T13:42:30.500000Z",
1137                    "2020-09-08T13:42:31.999999Z",
1138                ],
1139                Some("Europe/Berlin".into()),
1140                "second",
1141                vec![
1142                    "2020-09-08T13:42:29.000000Z",
1143                    "2020-09-08T13:42:30.000000Z",
1144                    "2020-09-08T13:42:31.000000Z",
1145                ],
1146            ),
1147            // Test "minute" granularity
1148            (
1149                vec![
1150                    "2020-09-08T13:42:29.190855Z",
1151                    "2020-09-08T13:43:30.500000Z",
1152                    "2020-09-08T13:44:31.999999Z",
1153                ],
1154                Some("+00".into()),
1155                "minute",
1156                vec![
1157                    "2020-09-08T13:42:00.000000Z",
1158                    "2020-09-08T13:43:00.000000Z",
1159                    "2020-09-08T13:44:00.000000Z",
1160                ],
1161            ),
1162            (
1163                vec![
1164                    "2020-09-08T13:42:29.190855+08",
1165                    "2020-09-08T13:43:30.500000+08",
1166                    "2020-09-08T13:44:31.999999+08",
1167                ],
1168                Some("+08".into()),
1169                "minute",
1170                vec![
1171                    "2020-09-08T13:42:00.000000+08",
1172                    "2020-09-08T13:43:00.000000+08",
1173                    "2020-09-08T13:44:00.000000+08",
1174                ],
1175            ),
1176            (
1177                vec![
1178                    "2020-09-08T13:42:29.190855Z",
1179                    "2020-09-08T13:43:30.500000Z",
1180                    "2020-09-08T13:44:31.999999Z",
1181                ],
1182                Some("America/Sao_Paulo".into()),
1183                "minute",
1184                vec![
1185                    "2020-09-08T13:42:00.000000Z",
1186                    "2020-09-08T13:43:00.000000Z",
1187                    "2020-09-08T13:44:00.000000Z",
1188                ],
1189            ),
1190            // Test with None (no timezone)
1191            (
1192                vec![
1193                    "2020-09-08T13:42:29.190855Z",
1194                    "2020-09-08T13:43:30.500000Z",
1195                    "2020-09-08T13:44:31.999999Z",
1196                ],
1197                None,
1198                "minute",
1199                vec![
1200                    "2020-09-08T13:42:00.000000Z",
1201                    "2020-09-08T13:43:00.000000Z",
1202                    "2020-09-08T13:44:00.000000Z",
1203                ],
1204            ),
1205            // Test millisecond granularity
1206            (
1207                vec![
1208                    "2020-09-08T13:42:29.190855Z",
1209                    "2020-09-08T13:42:29.191999Z",
1210                    "2020-09-08T13:42:29.192500Z",
1211                ],
1212                Some("Asia/Kolkata".into()),
1213                "millisecond",
1214                vec![
1215                    "2020-09-08T19:12:29.190000+05:30",
1216                    "2020-09-08T19:12:29.191000+05:30",
1217                    "2020-09-08T19:12:29.192000+05:30",
1218                ],
1219            ),
1220        ];
1221
1222        cases
1223            .iter()
1224            .for_each(|(original, tz_opt, granularity, expected)| {
1225                let input = original
1226                    .iter()
1227                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1228                    .collect::<TimestampNanosecondArray>()
1229                    .with_timezone_opt(tz_opt.clone());
1230                let right = expected
1231                    .iter()
1232                    .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1233                    .collect::<TimestampNanosecondArray>()
1234                    .with_timezone_opt(tz_opt.clone());
1235                let batch_len = input.len();
1236                let arg_fields = vec![
1237                    Field::new("a", DataType::Utf8, false).into(),
1238                    Field::new("b", input.data_type().clone(), false).into(),
1239                ];
1240                let args = datafusion_expr::ScalarFunctionArgs {
1241                    args: vec![
1242                        ColumnarValue::Scalar(ScalarValue::from(*granularity)),
1243                        ColumnarValue::Array(Arc::new(input)),
1244                    ],
1245                    arg_fields,
1246                    number_rows: batch_len,
1247                    return_field: Field::new(
1248                        "f",
1249                        DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1250                        true,
1251                    )
1252                    .into(),
1253                    config_options: Arc::new(ConfigOptions::default()),
1254                };
1255                let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1256                if let ColumnarValue::Array(result) = result {
1257                    assert_eq!(
1258                        result.data_type(),
1259                        &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1260                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1261                    );
1262                    let left = as_primitive_array::<TimestampNanosecondType>(&result);
1263                    assert_eq!(
1264                        left, &right,
1265                        "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1266                    );
1267                } else {
1268                    panic!("unexpected column type");
1269                }
1270            });
1271    }
1272}