datafusion_functions/datetime/
date_trunc.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::ops::{Add, Sub};
20use std::str::FromStr;
21use std::sync::Arc;
22
23use arrow::array::temporal_conversions::{
24    as_datetime_with_timezone, timestamp_ns_to_datetime,
25};
26use arrow::array::timezone::Tz;
27use arrow::array::types::{
28    ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
29    TimestampNanosecondType, TimestampSecondType,
30};
31use arrow::array::{Array, ArrayRef, Int64Array, PrimitiveArray};
32use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
33use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
34use datafusion_common::cast::as_primitive_array;
35use datafusion_common::{exec_err, plan_err, DataFusionError, Result, ScalarValue};
36use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
37use datafusion_expr::TypeSignature::Exact;
38use datafusion_expr::{
39    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
40};
41use datafusion_macros::user_doc;
42
43use chrono::{
44    DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
45};
46
47#[user_doc(
48    doc_section(label = "Time and Date Functions"),
49    description = "Truncates a timestamp value to a specified precision.",
50    syntax_example = "date_trunc(precision, expression)",
51    argument(
52        name = "precision",
53        description = r#"Time precision to truncate to. The following precisions are supported:
54
55    - year / YEAR
56    - quarter / QUARTER
57    - month / MONTH
58    - week / WEEK
59    - day / DAY
60    - hour / HOUR
61    - minute / MINUTE
62    - second / SECOND
63    - millisecond / MILLISECOND
64    - microsecond / MICROSECOND
65"#
66    ),
67    argument(
68        name = "expression",
69        description = "Time expression to operate on. Can be a constant, column, or function."
70    )
71)]
72#[derive(Debug, PartialEq, Eq, Hash)]
73pub struct DateTruncFunc {
74    signature: Signature,
75    aliases: Vec<String>,
76}
77
78impl Default for DateTruncFunc {
79    fn default() -> Self {
80        Self::new()
81    }
82}
83
84impl DateTruncFunc {
85    pub fn new() -> Self {
86        Self {
87            signature: Signature::one_of(
88                vec![
89                    Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
90                    Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
91                    Exact(vec![
92                        Utf8,
93                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
94                    ]),
95                    Exact(vec![
96                        Utf8View,
97                        Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
98                    ]),
99                    Exact(vec![Utf8, Timestamp(Microsecond, None)]),
100                    Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
101                    Exact(vec![
102                        Utf8,
103                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
104                    ]),
105                    Exact(vec![
106                        Utf8View,
107                        Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
108                    ]),
109                    Exact(vec![Utf8, Timestamp(Millisecond, None)]),
110                    Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
111                    Exact(vec![
112                        Utf8,
113                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
114                    ]),
115                    Exact(vec![
116                        Utf8View,
117                        Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
118                    ]),
119                    Exact(vec![Utf8, Timestamp(Second, None)]),
120                    Exact(vec![Utf8View, Timestamp(Second, None)]),
121                    Exact(vec![
122                        Utf8,
123                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
124                    ]),
125                    Exact(vec![
126                        Utf8View,
127                        Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
128                    ]),
129                ],
130                Volatility::Immutable,
131            ),
132            aliases: vec![String::from("datetrunc")],
133        }
134    }
135}
136
137impl ScalarUDFImpl for DateTruncFunc {
138    fn as_any(&self) -> &dyn Any {
139        self
140    }
141
142    fn name(&self) -> &str {
143        "date_trunc"
144    }
145
146    fn signature(&self) -> &Signature {
147        &self.signature
148    }
149
150    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
151        match &arg_types[1] {
152            Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
153                Ok(Timestamp(Nanosecond, None))
154            }
155            Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
156            Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
157            Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
158            Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
159            _ => plan_err!(
160                "The date_trunc function can only accept timestamp as the second arg."
161            ),
162        }
163    }
164
165    fn invoke_with_args(
166        &self,
167        args: datafusion_expr::ScalarFunctionArgs,
168    ) -> Result<ColumnarValue> {
169        let args = args.args;
170        let (granularity, array) = (&args[0], &args[1]);
171
172        let granularity = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
173            granularity
174        {
175            v.to_lowercase()
176        } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
177        {
178            v.to_lowercase()
179        } else {
180            return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
181        };
182
183        fn process_array<T: ArrowTimestampType>(
184            array: &dyn Array,
185            granularity: String,
186            tz_opt: &Option<Arc<str>>,
187        ) -> Result<ColumnarValue> {
188            let parsed_tz = parse_tz(tz_opt)?;
189            let array = as_primitive_array::<T>(array)?;
190
191            // fast path for fine granularities
192            if matches!(
193                granularity.as_str(),
194                // For modern timezones, it's correct to truncate "minute" in this way.
195                // Both datafusion and arrow are ignoring historical timezone's non-minute granularity
196                // bias (e.g., Asia/Kathmandu before 1919 is UTC+05:41:16).
197                "second" | "minute" | "millisecond" | "microsecond"
198            ) ||
199            // In UTC, "hour" and "day" have uniform durations and can be truncated with simple arithmetic
200            (parsed_tz.is_none() && matches!(granularity.as_str(), "hour" | "day"))
201            {
202                let result = general_date_trunc_array_fine_granularity(
203                    T::UNIT,
204                    array,
205                    granularity.as_str(),
206                )?;
207                return Ok(ColumnarValue::Array(result));
208            }
209
210            let array: PrimitiveArray<T> = array
211                .try_unary(|x| {
212                    general_date_trunc(T::UNIT, x, parsed_tz, granularity.as_str())
213                })?
214                .with_timezone_opt(tz_opt.clone());
215            Ok(ColumnarValue::Array(Arc::new(array)))
216        }
217
218        fn process_scalar<T: ArrowTimestampType>(
219            v: &Option<i64>,
220            granularity: String,
221            tz_opt: &Option<Arc<str>>,
222        ) -> Result<ColumnarValue> {
223            let parsed_tz = parse_tz(tz_opt)?;
224            let value = if let Some(v) = v {
225                Some(general_date_trunc(
226                    T::UNIT,
227                    *v,
228                    parsed_tz,
229                    granularity.as_str(),
230                )?)
231            } else {
232                None
233            };
234            let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
235            Ok(ColumnarValue::Scalar(value))
236        }
237
238        Ok(match array {
239            ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
240                process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
241            }
242            ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
243                process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
244            }
245            ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
246                process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
247            }
248            ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
249                process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
250            }
251            ColumnarValue::Array(array) => {
252                let array_type = array.data_type();
253                if let Timestamp(unit, tz_opt) = array_type {
254                    match unit {
255                        Second => process_array::<TimestampSecondType>(
256                            array,
257                            granularity,
258                            tz_opt,
259                        )?,
260                        Millisecond => process_array::<TimestampMillisecondType>(
261                            array,
262                            granularity,
263                            tz_opt,
264                        )?,
265                        Microsecond => process_array::<TimestampMicrosecondType>(
266                            array,
267                            granularity,
268                            tz_opt,
269                        )?,
270                        Nanosecond => process_array::<TimestampNanosecondType>(
271                            array,
272                            granularity,
273                            tz_opt,
274                        )?,
275                    }
276                } else {
277                    return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
278                }
279            }
280            _ => {
281                return exec_err!(
282                    "second argument of `date_trunc` must be timestamp scalar or array"
283                );
284            }
285        })
286    }
287
288    fn aliases(&self) -> &[String] {
289        &self.aliases
290    }
291
292    fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
293        // The DATE_TRUNC function preserves the order of its second argument.
294        let precision = &input[0];
295        let date_value = &input[1];
296
297        if precision.sort_properties.eq(&SortProperties::Singleton) {
298            Ok(date_value.sort_properties)
299        } else {
300            Ok(SortProperties::Unordered)
301        }
302    }
303    fn documentation(&self) -> Option<&Documentation> {
304        self.doc()
305    }
306}
307
308fn _date_trunc_coarse<T>(granularity: &str, value: Option<T>) -> Result<Option<T>>
309where
310    T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
311{
312    let value = match granularity {
313        "millisecond" => value,
314        "microsecond" => value,
315        "second" => value.and_then(|d| d.with_nanosecond(0)),
316        "minute" => value
317            .and_then(|d| d.with_nanosecond(0))
318            .and_then(|d| d.with_second(0)),
319        "hour" => value
320            .and_then(|d| d.with_nanosecond(0))
321            .and_then(|d| d.with_second(0))
322            .and_then(|d| d.with_minute(0)),
323        "day" => value
324            .and_then(|d| d.with_nanosecond(0))
325            .and_then(|d| d.with_second(0))
326            .and_then(|d| d.with_minute(0))
327            .and_then(|d| d.with_hour(0)),
328        "week" => value
329            .and_then(|d| d.with_nanosecond(0))
330            .and_then(|d| d.with_second(0))
331            .and_then(|d| d.with_minute(0))
332            .and_then(|d| d.with_hour(0))
333            .map(|d| {
334                d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
335            }),
336        "month" => value
337            .and_then(|d| d.with_nanosecond(0))
338            .and_then(|d| d.with_second(0))
339            .and_then(|d| d.with_minute(0))
340            .and_then(|d| d.with_hour(0))
341            .and_then(|d| d.with_day0(0)),
342        "quarter" => value
343            .and_then(|d| d.with_nanosecond(0))
344            .and_then(|d| d.with_second(0))
345            .and_then(|d| d.with_minute(0))
346            .and_then(|d| d.with_hour(0))
347            .and_then(|d| d.with_day0(0))
348            .and_then(|d| d.with_month(quarter_month(&d))),
349        "year" => value
350            .and_then(|d| d.with_nanosecond(0))
351            .and_then(|d| d.with_second(0))
352            .and_then(|d| d.with_minute(0))
353            .and_then(|d| d.with_hour(0))
354            .and_then(|d| d.with_day0(0))
355            .and_then(|d| d.with_month0(0)),
356        unsupported => {
357            return exec_err!("Unsupported date_trunc granularity: {unsupported}");
358        }
359    };
360    Ok(value)
361}
362
363fn quarter_month<T>(date: &T) -> u32
364where
365    T: Datelike,
366{
367    1 + 3 * ((date.month() - 1) / 3)
368}
369
370fn _date_trunc_coarse_with_tz(
371    granularity: &str,
372    value: Option<DateTime<Tz>>,
373) -> Result<Option<i64>> {
374    if let Some(value) = value {
375        let local = value.naive_local();
376        let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
377        let truncated = truncated.and_then(|truncated| {
378            match truncated.and_local_timezone(value.timezone()) {
379                LocalResult::None => {
380                    // This can happen if the date_trunc operation moves the time into
381                    // an hour that doesn't exist due to daylight savings. On known example where
382                    // this can happen is with historic dates in the America/Sao_Paulo time zone.
383                    // To account for this adjust the time by a few hours, convert to local time,
384                    // and then adjust the time back.
385                    truncated
386                        .sub(TimeDelta::try_hours(3).unwrap())
387                        .and_local_timezone(value.timezone())
388                        .single()
389                        .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
390                }
391                LocalResult::Single(datetime) => Some(datetime),
392                LocalResult::Ambiguous(datetime1, datetime2) => {
393                    // Because we are truncating from an equally or more specific time
394                    // the original time must have been within the ambiguous local time
395                    // period. Therefore the offset of one of these times should match the
396                    // offset of the original time.
397                    if datetime1.offset().fix() == value.offset().fix() {
398                        Some(datetime1)
399                    } else {
400                        Some(datetime2)
401                    }
402                }
403            }
404        });
405        Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
406    } else {
407        _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
408        Ok(None)
409    }
410}
411
412fn _date_trunc_coarse_without_tz(
413    granularity: &str,
414    value: Option<NaiveDateTime>,
415) -> Result<Option<i64>> {
416    let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
417    Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
418}
419
420/// Truncates the single `value`, expressed in nanoseconds since the
421/// epoch, for granularities greater than 1 second, in taking into
422/// account that some granularities are not uniform durations of time
423/// (e.g. months are not always the same lengths, leap seconds, etc)
424fn date_trunc_coarse(granularity: &str, value: i64, tz: Option<Tz>) -> Result<i64> {
425    let value = match tz {
426        Some(tz) => {
427            // Use chrono DateTime<Tz> to clear the various fields because need to clear per timezone,
428            // and NaiveDateTime (ISO 8601) has no concept of timezones
429            let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
430                .ok_or(DataFusionError::Execution(format!(
431                "Timestamp {value} out of range"
432            )))?;
433            _date_trunc_coarse_with_tz(granularity, Some(value))
434        }
435        None => {
436            // Use chrono NaiveDateTime to clear the various fields, if we don't have a timezone.
437            let value = timestamp_ns_to_datetime(value).ok_or_else(|| {
438                DataFusionError::Execution(format!("Timestamp {value} out of range"))
439            })?;
440            _date_trunc_coarse_without_tz(granularity, Some(value))
441        }
442    }?;
443
444    // `with_x(0)` are infallible because `0` are always a valid
445    Ok(value.unwrap())
446}
447
448/// Fast path for fine granularities (hour and smaller) that can be handled
449/// with simple arithmetic operations without calendar complexity.
450///
451/// This function is timezone-agnostic and should only be used when:
452/// - No timezone is specified in the input, OR
453/// - The granularity is less than hour as hour can be affected by DST transitions in some cases
454fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
455    tu: TimeUnit,
456    array: &PrimitiveArray<T>,
457    granularity: &str,
458) -> Result<ArrayRef> {
459    let unit = match (tu, granularity) {
460        (Second, "minute") => Some(Int64Array::new_scalar(60)),
461        (Second, "hour") => Some(Int64Array::new_scalar(3600)),
462        (Second, "day") => Some(Int64Array::new_scalar(86400)),
463
464        (Millisecond, "second") => Some(Int64Array::new_scalar(1_000)),
465        (Millisecond, "minute") => Some(Int64Array::new_scalar(60_000)),
466        (Millisecond, "hour") => Some(Int64Array::new_scalar(3_600_000)),
467        (Millisecond, "day") => Some(Int64Array::new_scalar(86_400_000)),
468
469        (Microsecond, "millisecond") => Some(Int64Array::new_scalar(1_000)),
470        (Microsecond, "second") => Some(Int64Array::new_scalar(1_000_000)),
471        (Microsecond, "minute") => Some(Int64Array::new_scalar(60_000_000)),
472        (Microsecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000)),
473        (Microsecond, "day") => Some(Int64Array::new_scalar(86_400_000_000)),
474
475        (Nanosecond, "microsecond") => Some(Int64Array::new_scalar(1_000)),
476        (Nanosecond, "millisecond") => Some(Int64Array::new_scalar(1_000_000)),
477        (Nanosecond, "second") => Some(Int64Array::new_scalar(1_000_000_000)),
478        (Nanosecond, "minute") => Some(Int64Array::new_scalar(60_000_000_000)),
479        (Nanosecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000_000)),
480        (Nanosecond, "day") => Some(Int64Array::new_scalar(86_400_000_000_000)),
481        _ => None,
482    };
483
484    if let Some(unit) = unit {
485        let original_type = array.data_type();
486        let array = arrow::compute::cast(array, &DataType::Int64)?;
487        let array = arrow::compute::kernels::numeric::div(&array, &unit)?;
488        let array = arrow::compute::kernels::numeric::mul(&array, &unit)?;
489        let array = arrow::compute::cast(&array, original_type)?;
490        Ok(array)
491    } else {
492        // truncate to the same or smaller unit
493        Ok(Arc::new(array.clone()))
494    }
495}
496
497// truncates a single value with the given timeunit to the specified granularity
498fn general_date_trunc(
499    tu: TimeUnit,
500    value: i64,
501    tz: Option<Tz>,
502    granularity: &str,
503) -> Result<i64, DataFusionError> {
504    let scale = match tu {
505        Second => 1_000_000_000,
506        Millisecond => 1_000_000,
507        Microsecond => 1_000,
508        Nanosecond => 1,
509    };
510
511    // convert to nanoseconds
512    let nano = date_trunc_coarse(granularity, scale * value, tz)?;
513
514    let result = match tu {
515        Second => match granularity {
516            "minute" => nano / 1_000_000_000 / 60 * 60,
517            _ => nano / 1_000_000_000,
518        },
519        Millisecond => match granularity {
520            "minute" => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
521            "second" => nano / 1_000_000 / 1_000 * 1_000,
522            _ => nano / 1_000_000,
523        },
524        Microsecond => match granularity {
525            "minute" => nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000,
526            "second" => nano / 1_000 / 1_000_000 * 1_000_000,
527            "millisecond" => nano / 1_000 / 1_000 * 1_000,
528            _ => nano / 1_000,
529        },
530        _ => match granularity {
531            "minute" => nano / 1_000_000_000 / 60 * 1_000_000_000 * 60,
532            "second" => nano / 1_000_000_000 * 1_000_000_000,
533            "millisecond" => nano / 1_000_000 * 1_000_000,
534            "microsecond" => nano / 1_000 * 1_000,
535            _ => nano,
536        },
537    };
538    Ok(result)
539}
540
541fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
542    tz.as_ref()
543        .map(|tz| {
544            Tz::from_str(tz).map_err(|op| {
545                DataFusionError::Execution(format!("failed on timezone {tz}: {op:?}"))
546            })
547        })
548        .transpose()
549}
550
551#[cfg(test)]
552mod tests {
553    use std::sync::Arc;
554
555    use crate::datetime::date_trunc::{date_trunc_coarse, DateTruncFunc};
556
557    use arrow::array::cast::as_primitive_array;
558    use arrow::array::types::TimestampNanosecondType;
559    use arrow::array::{Array, TimestampNanosecondArray};
560    use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
561    use arrow::datatypes::{DataType, Field, TimeUnit};
562    use datafusion_common::config::ConfigOptions;
563    use datafusion_common::ScalarValue;
564    use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
565
566    #[test]
567    fn date_trunc_test() {
568        let cases = vec![
569            (
570                "2020-09-08T13:42:29.190855Z",
571                "second",
572                "2020-09-08T13:42:29.000000Z",
573            ),
574            (
575                "2020-09-08T13:42:29.190855Z",
576                "minute",
577                "2020-09-08T13:42:00.000000Z",
578            ),
579            (
580                "2020-09-08T13:42:29.190855Z",
581                "hour",
582                "2020-09-08T13:00:00.000000Z",
583            ),
584            (
585                "2020-09-08T13:42:29.190855Z",
586                "day",
587                "2020-09-08T00:00:00.000000Z",
588            ),
589            (
590                "2020-09-08T13:42:29.190855Z",
591                "week",
592                "2020-09-07T00:00:00.000000Z",
593            ),
594            (
595                "2020-09-08T13:42:29.190855Z",
596                "month",
597                "2020-09-01T00:00:00.000000Z",
598            ),
599            (
600                "2020-09-08T13:42:29.190855Z",
601                "year",
602                "2020-01-01T00:00:00.000000Z",
603            ),
604            // week
605            (
606                "2021-01-01T13:42:29.190855Z",
607                "week",
608                "2020-12-28T00:00:00.000000Z",
609            ),
610            (
611                "2020-01-01T13:42:29.190855Z",
612                "week",
613                "2019-12-30T00:00:00.000000Z",
614            ),
615            // quarter
616            (
617                "2020-01-01T13:42:29.190855Z",
618                "quarter",
619                "2020-01-01T00:00:00.000000Z",
620            ),
621            (
622                "2020-02-01T13:42:29.190855Z",
623                "quarter",
624                "2020-01-01T00:00:00.000000Z",
625            ),
626            (
627                "2020-03-01T13:42:29.190855Z",
628                "quarter",
629                "2020-01-01T00:00:00.000000Z",
630            ),
631            (
632                "2020-04-01T13:42:29.190855Z",
633                "quarter",
634                "2020-04-01T00:00:00.000000Z",
635            ),
636            (
637                "2020-08-01T13:42:29.190855Z",
638                "quarter",
639                "2020-07-01T00:00:00.000000Z",
640            ),
641            (
642                "2020-11-01T13:42:29.190855Z",
643                "quarter",
644                "2020-10-01T00:00:00.000000Z",
645            ),
646            (
647                "2020-12-01T13:42:29.190855Z",
648                "quarter",
649                "2020-10-01T00:00:00.000000Z",
650            ),
651        ];
652
653        cases.iter().for_each(|(original, granularity, expected)| {
654            let left = string_to_timestamp_nanos(original).unwrap();
655            let right = string_to_timestamp_nanos(expected).unwrap();
656            let result = date_trunc_coarse(granularity, left, None).unwrap();
657            assert_eq!(result, right, "{original} = {expected}");
658        });
659    }
660
661    #[test]
662    fn test_date_trunc_timezones() {
663        let cases = vec![
664            (
665                vec![
666                    "2020-09-08T00:00:00Z",
667                    "2020-09-08T01:00:00Z",
668                    "2020-09-08T02:00:00Z",
669                    "2020-09-08T03:00:00Z",
670                    "2020-09-08T04:00:00Z",
671                ],
672                Some("+00".into()),
673                vec![
674                    "2020-09-08T00:00:00Z",
675                    "2020-09-08T00:00:00Z",
676                    "2020-09-08T00:00:00Z",
677                    "2020-09-08T00:00:00Z",
678                    "2020-09-08T00:00:00Z",
679                ],
680            ),
681            (
682                vec![
683                    "2020-09-08T00:00:00Z",
684                    "2020-09-08T01:00:00Z",
685                    "2020-09-08T02:00:00Z",
686                    "2020-09-08T03:00:00Z",
687                    "2020-09-08T04:00:00Z",
688                ],
689                None,
690                vec![
691                    "2020-09-08T00:00:00Z",
692                    "2020-09-08T00:00:00Z",
693                    "2020-09-08T00:00:00Z",
694                    "2020-09-08T00:00:00Z",
695                    "2020-09-08T00:00:00Z",
696                ],
697            ),
698            (
699                vec![
700                    "2020-09-08T00:00:00Z",
701                    "2020-09-08T01:00:00Z",
702                    "2020-09-08T02:00:00Z",
703                    "2020-09-08T03:00:00Z",
704                    "2020-09-08T04:00:00Z",
705                ],
706                Some("-02".into()),
707                vec![
708                    "2020-09-07T02:00:00Z",
709                    "2020-09-07T02:00:00Z",
710                    "2020-09-08T02:00:00Z",
711                    "2020-09-08T02:00:00Z",
712                    "2020-09-08T02:00:00Z",
713                ],
714            ),
715            (
716                vec![
717                    "2020-09-08T00:00:00+05",
718                    "2020-09-08T01:00:00+05",
719                    "2020-09-08T02:00:00+05",
720                    "2020-09-08T03:00:00+05",
721                    "2020-09-08T04:00:00+05",
722                ],
723                Some("+05".into()),
724                vec![
725                    "2020-09-08T00:00:00+05",
726                    "2020-09-08T00:00:00+05",
727                    "2020-09-08T00:00:00+05",
728                    "2020-09-08T00:00:00+05",
729                    "2020-09-08T00:00:00+05",
730                ],
731            ),
732            (
733                vec![
734                    "2020-09-08T00:00:00+08",
735                    "2020-09-08T01:00:00+08",
736                    "2020-09-08T02:00:00+08",
737                    "2020-09-08T03:00:00+08",
738                    "2020-09-08T04:00:00+08",
739                ],
740                Some("+08".into()),
741                vec![
742                    "2020-09-08T00:00:00+08",
743                    "2020-09-08T00:00:00+08",
744                    "2020-09-08T00:00:00+08",
745                    "2020-09-08T00:00:00+08",
746                    "2020-09-08T00:00:00+08",
747                ],
748            ),
749            (
750                vec![
751                    "2024-10-26T23:00:00Z",
752                    "2024-10-27T00:00:00Z",
753                    "2024-10-27T01:00:00Z",
754                    "2024-10-27T02:00:00Z",
755                ],
756                Some("Europe/Berlin".into()),
757                vec![
758                    "2024-10-27T00:00:00+02",
759                    "2024-10-27T00:00:00+02",
760                    "2024-10-27T00:00:00+02",
761                    "2024-10-27T00:00:00+02",
762                ],
763            ),
764            (
765                vec![
766                    "2018-02-18T00:00:00Z",
767                    "2018-02-18T01:00:00Z",
768                    "2018-02-18T02:00:00Z",
769                    "2018-02-18T03:00:00Z",
770                    "2018-11-04T01:00:00Z",
771                    "2018-11-04T02:00:00Z",
772                    "2018-11-04T03:00:00Z",
773                    "2018-11-04T04:00:00Z",
774                ],
775                Some("America/Sao_Paulo".into()),
776                vec![
777                    "2018-02-17T00:00:00-02",
778                    "2018-02-17T00:00:00-02",
779                    "2018-02-17T00:00:00-02",
780                    "2018-02-18T00:00:00-03",
781                    "2018-11-03T00:00:00-03",
782                    "2018-11-03T00:00:00-03",
783                    "2018-11-04T01:00:00-02",
784                    "2018-11-04T01:00:00-02",
785                ],
786            ),
787        ];
788
789        cases.iter().for_each(|(original, tz_opt, expected)| {
790            let input = original
791                .iter()
792                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
793                .collect::<TimestampNanosecondArray>()
794                .with_timezone_opt(tz_opt.clone());
795            let right = expected
796                .iter()
797                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
798                .collect::<TimestampNanosecondArray>()
799                .with_timezone_opt(tz_opt.clone());
800            let batch_len = input.len();
801            let arg_fields = vec![
802                Field::new("a", DataType::Utf8, false).into(),
803                Field::new("b", input.data_type().clone(), false).into(),
804            ];
805            let args = datafusion_expr::ScalarFunctionArgs {
806                args: vec![
807                    ColumnarValue::Scalar(ScalarValue::from("day")),
808                    ColumnarValue::Array(Arc::new(input)),
809                ],
810                arg_fields,
811                number_rows: batch_len,
812                return_field: Field::new(
813                    "f",
814                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
815                    true,
816                )
817                .into(),
818                config_options: Arc::new(ConfigOptions::default()),
819            };
820            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
821            if let ColumnarValue::Array(result) = result {
822                assert_eq!(
823                    result.data_type(),
824                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
825                );
826                let left = as_primitive_array::<TimestampNanosecondType>(&result);
827                assert_eq!(left, &right);
828            } else {
829                panic!("unexpected column type");
830            }
831        });
832    }
833
834    #[test]
835    fn test_date_trunc_hour_timezones() {
836        let cases = vec![
837            (
838                vec![
839                    "2020-09-08T00:30:00Z",
840                    "2020-09-08T01:30:00Z",
841                    "2020-09-08T02:30:00Z",
842                    "2020-09-08T03:30:00Z",
843                    "2020-09-08T04:30:00Z",
844                ],
845                Some("+00".into()),
846                vec![
847                    "2020-09-08T00:00:00Z",
848                    "2020-09-08T01:00:00Z",
849                    "2020-09-08T02:00:00Z",
850                    "2020-09-08T03:00:00Z",
851                    "2020-09-08T04:00:00Z",
852                ],
853            ),
854            (
855                vec![
856                    "2020-09-08T00:30:00Z",
857                    "2020-09-08T01:30:00Z",
858                    "2020-09-08T02:30:00Z",
859                    "2020-09-08T03:30:00Z",
860                    "2020-09-08T04:30:00Z",
861                ],
862                None,
863                vec![
864                    "2020-09-08T00:00:00Z",
865                    "2020-09-08T01:00:00Z",
866                    "2020-09-08T02:00:00Z",
867                    "2020-09-08T03:00:00Z",
868                    "2020-09-08T04:00:00Z",
869                ],
870            ),
871            (
872                vec![
873                    "2020-09-08T00:30:00Z",
874                    "2020-09-08T01:30:00Z",
875                    "2020-09-08T02:30:00Z",
876                    "2020-09-08T03:30:00Z",
877                    "2020-09-08T04:30:00Z",
878                ],
879                Some("-02".into()),
880                vec![
881                    "2020-09-08T00:00:00Z",
882                    "2020-09-08T01:00:00Z",
883                    "2020-09-08T02:00:00Z",
884                    "2020-09-08T03:00:00Z",
885                    "2020-09-08T04:00:00Z",
886                ],
887            ),
888            (
889                vec![
890                    "2020-09-08T00:30:00+05",
891                    "2020-09-08T01:30:00+05",
892                    "2020-09-08T02:30:00+05",
893                    "2020-09-08T03:30:00+05",
894                    "2020-09-08T04:30:00+05",
895                ],
896                Some("+05".into()),
897                vec![
898                    "2020-09-08T00:00:00+05",
899                    "2020-09-08T01:00:00+05",
900                    "2020-09-08T02:00:00+05",
901                    "2020-09-08T03:00:00+05",
902                    "2020-09-08T04:00:00+05",
903                ],
904            ),
905            (
906                vec![
907                    "2020-09-08T00:30:00+08",
908                    "2020-09-08T01:30:00+08",
909                    "2020-09-08T02:30:00+08",
910                    "2020-09-08T03:30:00+08",
911                    "2020-09-08T04:30:00+08",
912                ],
913                Some("+08".into()),
914                vec![
915                    "2020-09-08T00:00:00+08",
916                    "2020-09-08T01:00:00+08",
917                    "2020-09-08T02:00:00+08",
918                    "2020-09-08T03:00:00+08",
919                    "2020-09-08T04:00:00+08",
920                ],
921            ),
922            (
923                vec![
924                    "2024-10-26T23:30:00Z",
925                    "2024-10-27T00:30:00Z",
926                    "2024-10-27T01:30:00Z",
927                    "2024-10-27T02:30:00Z",
928                ],
929                Some("Europe/Berlin".into()),
930                vec![
931                    "2024-10-27T01:00:00+02",
932                    "2024-10-27T02:00:00+02",
933                    "2024-10-27T02:00:00+01",
934                    "2024-10-27T03:00:00+01",
935                ],
936            ),
937            (
938                vec![
939                    "2018-02-18T00:30:00Z",
940                    "2018-02-18T01:30:00Z",
941                    "2018-02-18T02:30:00Z",
942                    "2018-02-18T03:30:00Z",
943                    "2018-11-04T01:00:00Z",
944                    "2018-11-04T02:00:00Z",
945                    "2018-11-04T03:00:00Z",
946                    "2018-11-04T04:00:00Z",
947                ],
948                Some("America/Sao_Paulo".into()),
949                vec![
950                    "2018-02-17T22:00:00-02",
951                    "2018-02-17T23:00:00-02",
952                    "2018-02-17T23:00:00-03",
953                    "2018-02-18T00:00:00-03",
954                    "2018-11-03T22:00:00-03",
955                    "2018-11-03T23:00:00-03",
956                    "2018-11-04T01:00:00-02",
957                    "2018-11-04T02:00:00-02",
958                ],
959            ),
960            (
961                vec![
962                    "2024-10-26T23:30:00Z",
963                    "2024-10-27T00:30:00Z",
964                    "2024-10-27T01:30:00Z",
965                    "2024-10-27T02:30:00Z",
966                ],
967                Some("Asia/Kathmandu".into()), // UTC+5:45
968                vec![
969                    "2024-10-27T05:00:00+05:45",
970                    "2024-10-27T06:00:00+05:45",
971                    "2024-10-27T07:00:00+05:45",
972                    "2024-10-27T08:00:00+05:45",
973                ],
974            ),
975        ];
976
977        cases.iter().for_each(|(original, tz_opt, expected)| {
978            let input = original
979                .iter()
980                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
981                .collect::<TimestampNanosecondArray>()
982                .with_timezone_opt(tz_opt.clone());
983            let right = expected
984                .iter()
985                .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
986                .collect::<TimestampNanosecondArray>()
987                .with_timezone_opt(tz_opt.clone());
988            let batch_len = input.len();
989            let arg_fields = vec![
990                Field::new("a", DataType::Utf8, false).into(),
991                Field::new("b", input.data_type().clone(), false).into(),
992            ];
993            let args = datafusion_expr::ScalarFunctionArgs {
994                args: vec![
995                    ColumnarValue::Scalar(ScalarValue::from("hour")),
996                    ColumnarValue::Array(Arc::new(input)),
997                ],
998                arg_fields,
999                number_rows: batch_len,
1000                return_field: Field::new(
1001                    "f",
1002                    DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1003                    true,
1004                )
1005                .into(),
1006                config_options: Arc::new(ConfigOptions::default()),
1007            };
1008            let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1009            if let ColumnarValue::Array(result) = result {
1010                assert_eq!(
1011                    result.data_type(),
1012                    &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1013                );
1014                let left = as_primitive_array::<TimestampNanosecondType>(&result);
1015                assert_eq!(left, &right);
1016            } else {
1017                panic!("unexpected column type");
1018            }
1019        });
1020    }
1021}