1use std::any::Any;
19use std::num::NonZeroI64;
20use std::ops::{Add, Sub};
21use std::str::FromStr;
22use std::sync::Arc;
23
24use arrow::array::temporal_conversions::{
25 as_datetime_with_timezone, timestamp_ns_to_datetime,
26};
27use arrow::array::timezone::Tz;
28use arrow::array::types::{
29 ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
30 TimestampNanosecondType, TimestampSecondType,
31};
32use arrow::array::{Array, ArrayRef, PrimitiveArray};
33use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
34use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
35use datafusion_common::cast::as_primitive_array;
36use datafusion_common::{
37 DataFusionError, Result, ScalarValue, exec_datafusion_err, exec_err, plan_err,
38};
39use datafusion_expr::TypeSignature::Exact;
40use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
41use datafusion_expr::{
42 ColumnarValue, Documentation, ScalarUDFImpl, Signature, TIMEZONE_WILDCARD, Volatility,
43};
44use datafusion_macros::user_doc;
45
46use chrono::{
47 DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
48};
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52enum DateTruncGranularity {
53 Microsecond,
54 Millisecond,
55 Second,
56 Minute,
57 Hour,
58 Day,
59 Week,
60 Month,
61 Quarter,
62 Year,
63}
64
65impl DateTruncGranularity {
66 const SUPPORTED_GRANULARITIES: &[&str] = &[
70 "microsecond",
71 "millisecond",
72 "second",
73 "minute",
74 "hour",
75 "day",
76 "week",
77 "month",
78 "quarter",
79 "year",
80 ];
81
82 fn from_str(s: &str) -> Result<Self> {
84 match s.to_lowercase().as_str() {
86 "microsecond" => Ok(Self::Microsecond),
87 "millisecond" => Ok(Self::Millisecond),
88 "second" => Ok(Self::Second),
89 "minute" => Ok(Self::Minute),
90 "hour" => Ok(Self::Hour),
91 "day" => Ok(Self::Day),
92 "week" => Ok(Self::Week),
93 "month" => Ok(Self::Month),
94 "quarter" => Ok(Self::Quarter),
95 "year" => Ok(Self::Year),
96 _ => {
97 let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
98 exec_err!(
99 "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100 )
101 }
102 }
103 }
104
105 fn is_fine_granularity(&self) -> bool {
108 matches!(
109 self,
110 Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
111 )
112 }
113
114 fn is_fine_granularity_utc(&self) -> bool {
117 self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118 }
119}
120
121#[user_doc(
122 doc_section(label = "Time and Date Functions"),
123 description = "Truncates a timestamp value to a specified precision.",
124 syntax_example = "date_trunc(precision, expression)",
125 argument(
126 name = "precision",
127 description = r#"Time precision to truncate to. The following precisions are supported:
128
129 - year / YEAR
130 - quarter / QUARTER
131 - month / MONTH
132 - week / WEEK
133 - day / DAY
134 - hour / HOUR
135 - minute / MINUTE
136 - second / SECOND
137 - millisecond / MILLISECOND
138 - microsecond / MICROSECOND
139"#
140 ),
141 argument(
142 name = "expression",
143 description = "Time expression to operate on. Can be a constant, column, or function."
144 )
145)]
146#[derive(Debug, PartialEq, Eq, Hash)]
147pub struct DateTruncFunc {
148 signature: Signature,
149 aliases: Vec<String>,
150}
151
152impl Default for DateTruncFunc {
153 fn default() -> Self {
154 Self::new()
155 }
156}
157
158impl DateTruncFunc {
159 pub fn new() -> Self {
160 Self {
161 signature: Signature::one_of(
162 vec![
163 Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
164 Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
165 Exact(vec![
166 Utf8,
167 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
168 ]),
169 Exact(vec![
170 Utf8View,
171 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
172 ]),
173 Exact(vec![Utf8, Timestamp(Microsecond, None)]),
174 Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
175 Exact(vec![
176 Utf8,
177 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
178 ]),
179 Exact(vec![
180 Utf8View,
181 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
182 ]),
183 Exact(vec![Utf8, Timestamp(Millisecond, None)]),
184 Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
185 Exact(vec![
186 Utf8,
187 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
188 ]),
189 Exact(vec![
190 Utf8View,
191 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
192 ]),
193 Exact(vec![Utf8, Timestamp(Second, None)]),
194 Exact(vec![Utf8View, Timestamp(Second, None)]),
195 Exact(vec![
196 Utf8,
197 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
198 ]),
199 Exact(vec![
200 Utf8View,
201 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
202 ]),
203 ],
204 Volatility::Immutable,
205 ),
206 aliases: vec![String::from("datetrunc")],
207 }
208 }
209}
210
211impl ScalarUDFImpl for DateTruncFunc {
212 fn as_any(&self) -> &dyn Any {
213 self
214 }
215
216 fn name(&self) -> &str {
217 "date_trunc"
218 }
219
220 fn signature(&self) -> &Signature {
221 &self.signature
222 }
223
224 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
225 match &arg_types[1] {
226 Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
227 Ok(Timestamp(Nanosecond, None))
228 }
229 Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
230 Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
231 Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
232 Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
233 _ => plan_err!(
234 "The date_trunc function can only accept timestamp as the second arg."
235 ),
236 }
237 }
238
239 fn invoke_with_args(
240 &self,
241 args: datafusion_expr::ScalarFunctionArgs,
242 ) -> Result<ColumnarValue> {
243 let args = args.args;
244 let (granularity, array) = (&args[0], &args[1]);
245
246 let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
247 granularity
248 {
249 v.to_lowercase()
250 } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
251 {
252 v.to_lowercase()
253 } else {
254 return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
255 };
256
257 let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258
259 fn process_array<T: ArrowTimestampType>(
260 array: &dyn Array,
261 granularity: DateTruncGranularity,
262 tz_opt: &Option<Arc<str>>,
263 ) -> Result<ColumnarValue> {
264 let parsed_tz = parse_tz(tz_opt)?;
265 let array = as_primitive_array::<T>(array)?;
266
267 if granularity.is_fine_granularity()
273 || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
274 {
275 let result = general_date_trunc_array_fine_granularity(
276 T::UNIT,
277 array,
278 granularity,
279 tz_opt.clone(),
280 )?;
281 return Ok(ColumnarValue::Array(result));
282 }
283
284 let array: PrimitiveArray<T> = array
285 .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
286 .with_timezone_opt(tz_opt.clone());
287 Ok(ColumnarValue::Array(Arc::new(array)))
288 }
289
290 fn process_scalar<T: ArrowTimestampType>(
291 v: &Option<i64>,
292 granularity: DateTruncGranularity,
293 tz_opt: &Option<Arc<str>>,
294 ) -> Result<ColumnarValue> {
295 let parsed_tz = parse_tz(tz_opt)?;
296 let value = if let Some(v) = v {
297 Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
298 } else {
299 None
300 };
301 let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
302 Ok(ColumnarValue::Scalar(value))
303 }
304
305 Ok(match array {
306 ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
307 process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
308 }
309 ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
310 process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
311 }
312 ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
313 process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
314 }
315 ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
316 process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
317 }
318 ColumnarValue::Array(array) => {
319 let array_type = array.data_type();
320 if let Timestamp(unit, tz_opt) = array_type {
321 match unit {
322 Second => process_array::<TimestampSecondType>(
323 array,
324 granularity,
325 tz_opt,
326 )?,
327 Millisecond => process_array::<TimestampMillisecondType>(
328 array,
329 granularity,
330 tz_opt,
331 )?,
332 Microsecond => process_array::<TimestampMicrosecondType>(
333 array,
334 granularity,
335 tz_opt,
336 )?,
337 Nanosecond => process_array::<TimestampNanosecondType>(
338 array,
339 granularity,
340 tz_opt,
341 )?,
342 }
343 } else {
344 return exec_err!(
345 "second argument of `date_trunc` is an unsupported array type: {array_type}"
346 );
347 }
348 }
349 _ => {
350 return exec_err!(
351 "second argument of `date_trunc` must be timestamp scalar or array"
352 );
353 }
354 })
355 }
356
357 fn aliases(&self) -> &[String] {
358 &self.aliases
359 }
360
361 fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
362 let precision = &input[0];
364 let date_value = &input[1];
365
366 if precision.sort_properties.eq(&SortProperties::Singleton) {
367 Ok(date_value.sort_properties)
368 } else {
369 Ok(SortProperties::Unordered)
370 }
371 }
372 fn documentation(&self) -> Option<&Documentation> {
373 self.doc()
374 }
375}
376
377fn _date_trunc_coarse<T>(
378 granularity: DateTruncGranularity,
379 value: Option<T>,
380) -> Result<Option<T>>
381where
382 T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
383{
384 let value = match granularity {
385 DateTruncGranularity::Millisecond => value,
386 DateTruncGranularity::Microsecond => value,
387 DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
388 DateTruncGranularity::Minute => value
389 .and_then(|d| d.with_nanosecond(0))
390 .and_then(|d| d.with_second(0)),
391 DateTruncGranularity::Hour => value
392 .and_then(|d| d.with_nanosecond(0))
393 .and_then(|d| d.with_second(0))
394 .and_then(|d| d.with_minute(0)),
395 DateTruncGranularity::Day => value
396 .and_then(|d| d.with_nanosecond(0))
397 .and_then(|d| d.with_second(0))
398 .and_then(|d| d.with_minute(0))
399 .and_then(|d| d.with_hour(0)),
400 DateTruncGranularity::Week => value
401 .and_then(|d| d.with_nanosecond(0))
402 .and_then(|d| d.with_second(0))
403 .and_then(|d| d.with_minute(0))
404 .and_then(|d| d.with_hour(0))
405 .map(|d| {
406 d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
407 }),
408 DateTruncGranularity::Month => value
409 .and_then(|d| d.with_nanosecond(0))
410 .and_then(|d| d.with_second(0))
411 .and_then(|d| d.with_minute(0))
412 .and_then(|d| d.with_hour(0))
413 .and_then(|d| d.with_day0(0)),
414 DateTruncGranularity::Quarter => value
415 .and_then(|d| d.with_nanosecond(0))
416 .and_then(|d| d.with_second(0))
417 .and_then(|d| d.with_minute(0))
418 .and_then(|d| d.with_hour(0))
419 .and_then(|d| d.with_day0(0))
420 .and_then(|d| d.with_month(quarter_month(&d))),
421 DateTruncGranularity::Year => value
422 .and_then(|d| d.with_nanosecond(0))
423 .and_then(|d| d.with_second(0))
424 .and_then(|d| d.with_minute(0))
425 .and_then(|d| d.with_hour(0))
426 .and_then(|d| d.with_day0(0))
427 .and_then(|d| d.with_month0(0)),
428 };
429 Ok(value)
430}
431
432fn quarter_month<T>(date: &T) -> u32
433where
434 T: Datelike,
435{
436 1 + 3 * ((date.month() - 1) / 3)
437}
438
439fn _date_trunc_coarse_with_tz(
440 granularity: DateTruncGranularity,
441 value: Option<DateTime<Tz>>,
442) -> Result<Option<i64>> {
443 if let Some(value) = value {
444 let local = value.naive_local();
445 let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
446 let truncated = truncated.and_then(|truncated| {
447 match truncated.and_local_timezone(value.timezone()) {
448 LocalResult::None => {
449 truncated
455 .sub(TimeDelta::try_hours(3).unwrap())
456 .and_local_timezone(value.timezone())
457 .single()
458 .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
459 }
460 LocalResult::Single(datetime) => Some(datetime),
461 LocalResult::Ambiguous(datetime1, datetime2) => {
462 if datetime1.offset().fix() == value.offset().fix() {
467 Some(datetime1)
468 } else {
469 Some(datetime2)
470 }
471 }
472 }
473 });
474 Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
475 } else {
476 _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
477 Ok(None)
478 }
479}
480
481fn _date_trunc_coarse_without_tz(
482 granularity: DateTruncGranularity,
483 value: Option<NaiveDateTime>,
484) -> Result<Option<i64>> {
485 let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
486 Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
487}
488
489fn date_trunc_coarse(
494 granularity: DateTruncGranularity,
495 value: i64,
496 tz: Option<Tz>,
497) -> Result<i64> {
498 let value = match tz {
499 Some(tz) => {
500 let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
503 .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
504 _date_trunc_coarse_with_tz(granularity, Some(value))
505 }
506 None => {
507 let value = timestamp_ns_to_datetime(value)
509 .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
510 _date_trunc_coarse_without_tz(granularity, Some(value))
511 }
512 }?;
513
514 Ok(value.unwrap())
516}
517
518fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
525 tu: TimeUnit,
526 array: &PrimitiveArray<T>,
527 granularity: DateTruncGranularity,
528 tz_opt: Option<Arc<str>>,
529) -> Result<ArrayRef> {
530 let unit = match (tu, granularity) {
531 (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
532 (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
533 (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
534
535 (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
536 (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
537 (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
538 (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
539
540 (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
541 (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
542 (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
543 (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
544 (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
545
546 (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
547 (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
548 (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
549 (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
550 (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
551 (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
552 _ => None,
553 };
554
555 if let Some(unit) = unit {
556 let unit = unit.get();
557 let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
558 array
559 .values()
560 .iter()
561 .map(|v| *v - i64::rem_euclid(*v, unit)),
562 array.nulls().cloned(),
563 )
564 .with_timezone_opt(tz_opt);
565 Ok(Arc::new(array))
566 } else {
567 Ok(Arc::new(array.clone()))
569 }
570}
571
572fn general_date_trunc(
574 tu: TimeUnit,
575 value: i64,
576 tz: Option<Tz>,
577 granularity: DateTruncGranularity,
578) -> Result<i64, DataFusionError> {
579 let scale = match tu {
580 Second => 1_000_000_000,
581 Millisecond => 1_000_000,
582 Microsecond => 1_000,
583 Nanosecond => 1,
584 };
585
586 let nano = date_trunc_coarse(granularity, scale * value, tz)?;
588
589 let result = match tu {
590 Second => match granularity {
591 DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
592 _ => nano / 1_000_000_000,
593 },
594 Millisecond => match granularity {
595 DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
596 DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
597 _ => nano / 1_000_000,
598 },
599 Microsecond => match granularity {
600 DateTruncGranularity::Minute => {
601 nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
602 }
603 DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
604 DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
605 _ => nano / 1_000,
606 },
607 _ => match granularity {
608 DateTruncGranularity::Minute => {
609 nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
610 }
611 DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
612 DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
613 DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
614 _ => nano,
615 },
616 };
617 Ok(result)
618}
619
620fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
621 tz.as_ref()
622 .map(|tz| {
623 Tz::from_str(tz)
624 .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
625 })
626 .transpose()
627}
628
629#[cfg(test)]
630mod tests {
631 use std::sync::Arc;
632
633 use crate::datetime::date_trunc::{
634 DateTruncFunc, DateTruncGranularity, date_trunc_coarse,
635 };
636
637 use arrow::array::cast::as_primitive_array;
638 use arrow::array::types::TimestampNanosecondType;
639 use arrow::array::{Array, TimestampNanosecondArray};
640 use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
641 use arrow::datatypes::{DataType, Field, TimeUnit};
642 use datafusion_common::ScalarValue;
643 use datafusion_common::config::ConfigOptions;
644 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
645
646 #[test]
647 fn date_trunc_test() {
648 let cases = vec![
649 (
650 "2020-09-08T13:42:29.190855Z",
651 "second",
652 "2020-09-08T13:42:29.000000Z",
653 ),
654 (
655 "2020-09-08T13:42:29.190855Z",
656 "minute",
657 "2020-09-08T13:42:00.000000Z",
658 ),
659 (
660 "2020-09-08T13:42:29.190855Z",
661 "hour",
662 "2020-09-08T13:00:00.000000Z",
663 ),
664 (
665 "2020-09-08T13:42:29.190855Z",
666 "day",
667 "2020-09-08T00:00:00.000000Z",
668 ),
669 (
670 "2020-09-08T13:42:29.190855Z",
671 "week",
672 "2020-09-07T00:00:00.000000Z",
673 ),
674 (
675 "2020-09-08T13:42:29.190855Z",
676 "month",
677 "2020-09-01T00:00:00.000000Z",
678 ),
679 (
680 "2020-09-08T13:42:29.190855Z",
681 "year",
682 "2020-01-01T00:00:00.000000Z",
683 ),
684 (
686 "2021-01-01T13:42:29.190855Z",
687 "week",
688 "2020-12-28T00:00:00.000000Z",
689 ),
690 (
691 "2020-01-01T13:42:29.190855Z",
692 "week",
693 "2019-12-30T00:00:00.000000Z",
694 ),
695 (
697 "2020-01-01T13:42:29.190855Z",
698 "quarter",
699 "2020-01-01T00:00:00.000000Z",
700 ),
701 (
702 "2020-02-01T13:42:29.190855Z",
703 "quarter",
704 "2020-01-01T00:00:00.000000Z",
705 ),
706 (
707 "2020-03-01T13:42:29.190855Z",
708 "quarter",
709 "2020-01-01T00:00:00.000000Z",
710 ),
711 (
712 "2020-04-01T13:42:29.190855Z",
713 "quarter",
714 "2020-04-01T00:00:00.000000Z",
715 ),
716 (
717 "2020-08-01T13:42:29.190855Z",
718 "quarter",
719 "2020-07-01T00:00:00.000000Z",
720 ),
721 (
722 "2020-11-01T13:42:29.190855Z",
723 "quarter",
724 "2020-10-01T00:00:00.000000Z",
725 ),
726 (
727 "2020-12-01T13:42:29.190855Z",
728 "quarter",
729 "2020-10-01T00:00:00.000000Z",
730 ),
731 ];
732
733 cases.iter().for_each(|(original, granularity, expected)| {
734 let left = string_to_timestamp_nanos(original).unwrap();
735 let right = string_to_timestamp_nanos(expected).unwrap();
736 let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
737 let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
738 assert_eq!(result, right, "{original} = {expected}");
739 });
740 }
741
742 #[test]
743 fn test_date_trunc_timezones() {
744 let cases = [
745 (
746 vec![
747 "2020-09-08T00:00:00Z",
748 "2020-09-08T01:00:00Z",
749 "2020-09-08T02:00:00Z",
750 "2020-09-08T03:00:00Z",
751 "2020-09-08T04:00:00Z",
752 ],
753 Some("+00".into()),
754 vec![
755 "2020-09-08T00:00:00Z",
756 "2020-09-08T00:00:00Z",
757 "2020-09-08T00:00:00Z",
758 "2020-09-08T00:00:00Z",
759 "2020-09-08T00:00:00Z",
760 ],
761 ),
762 (
763 vec![
764 "2020-09-08T00:00:00Z",
765 "2020-09-08T01:00:00Z",
766 "2020-09-08T02:00:00Z",
767 "2020-09-08T03:00:00Z",
768 "2020-09-08T04:00:00Z",
769 ],
770 None,
771 vec![
772 "2020-09-08T00:00:00Z",
773 "2020-09-08T00:00:00Z",
774 "2020-09-08T00:00:00Z",
775 "2020-09-08T00:00:00Z",
776 "2020-09-08T00:00:00Z",
777 ],
778 ),
779 (
780 vec![
781 "2020-09-08T00:00:00Z",
782 "2020-09-08T01:00:00Z",
783 "2020-09-08T02:00:00Z",
784 "2020-09-08T03:00:00Z",
785 "2020-09-08T04:00:00Z",
786 ],
787 Some("-02".into()),
788 vec![
789 "2020-09-07T02:00:00Z",
790 "2020-09-07T02:00:00Z",
791 "2020-09-08T02:00:00Z",
792 "2020-09-08T02:00:00Z",
793 "2020-09-08T02:00:00Z",
794 ],
795 ),
796 (
797 vec![
798 "2020-09-08T00:00:00+05",
799 "2020-09-08T01:00:00+05",
800 "2020-09-08T02:00:00+05",
801 "2020-09-08T03:00:00+05",
802 "2020-09-08T04:00:00+05",
803 ],
804 Some("+05".into()),
805 vec![
806 "2020-09-08T00:00:00+05",
807 "2020-09-08T00:00:00+05",
808 "2020-09-08T00:00:00+05",
809 "2020-09-08T00:00:00+05",
810 "2020-09-08T00:00:00+05",
811 ],
812 ),
813 (
814 vec![
815 "2020-09-08T00:00:00+08",
816 "2020-09-08T01:00:00+08",
817 "2020-09-08T02:00:00+08",
818 "2020-09-08T03:00:00+08",
819 "2020-09-08T04:00:00+08",
820 ],
821 Some("+08".into()),
822 vec![
823 "2020-09-08T00:00:00+08",
824 "2020-09-08T00:00:00+08",
825 "2020-09-08T00:00:00+08",
826 "2020-09-08T00:00:00+08",
827 "2020-09-08T00:00:00+08",
828 ],
829 ),
830 (
831 vec![
832 "2024-10-26T23:00:00Z",
833 "2024-10-27T00:00:00Z",
834 "2024-10-27T01:00:00Z",
835 "2024-10-27T02:00:00Z",
836 ],
837 Some("Europe/Berlin".into()),
838 vec![
839 "2024-10-27T00:00:00+02",
840 "2024-10-27T00:00:00+02",
841 "2024-10-27T00:00:00+02",
842 "2024-10-27T00:00:00+02",
843 ],
844 ),
845 (
846 vec![
847 "2018-02-18T00:00:00Z",
848 "2018-02-18T01:00:00Z",
849 "2018-02-18T02:00:00Z",
850 "2018-02-18T03:00:00Z",
851 "2018-11-04T01:00:00Z",
852 "2018-11-04T02:00:00Z",
853 "2018-11-04T03:00:00Z",
854 "2018-11-04T04:00:00Z",
855 ],
856 Some("America/Sao_Paulo".into()),
857 vec![
858 "2018-02-17T00:00:00-02",
859 "2018-02-17T00:00:00-02",
860 "2018-02-17T00:00:00-02",
861 "2018-02-18T00:00:00-03",
862 "2018-11-03T00:00:00-03",
863 "2018-11-03T00:00:00-03",
864 "2018-11-04T01:00:00-02",
865 "2018-11-04T01:00:00-02",
866 ],
867 ),
868 ];
869
870 cases.iter().for_each(|(original, tz_opt, expected)| {
871 let input = original
872 .iter()
873 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
874 .collect::<TimestampNanosecondArray>()
875 .with_timezone_opt(tz_opt.clone());
876 let right = expected
877 .iter()
878 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
879 .collect::<TimestampNanosecondArray>()
880 .with_timezone_opt(tz_opt.clone());
881 let batch_len = input.len();
882 let arg_fields = vec![
883 Field::new("a", DataType::Utf8, false).into(),
884 Field::new("b", input.data_type().clone(), false).into(),
885 ];
886 let args = datafusion_expr::ScalarFunctionArgs {
887 args: vec![
888 ColumnarValue::Scalar(ScalarValue::from("day")),
889 ColumnarValue::Array(Arc::new(input)),
890 ],
891 arg_fields,
892 number_rows: batch_len,
893 return_field: Field::new(
894 "f",
895 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
896 true,
897 )
898 .into(),
899 config_options: Arc::new(ConfigOptions::default()),
900 };
901 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
902 if let ColumnarValue::Array(result) = result {
903 assert_eq!(
904 result.data_type(),
905 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
906 );
907 let left = as_primitive_array::<TimestampNanosecondType>(&result);
908 assert_eq!(left, &right);
909 } else {
910 panic!("unexpected column type");
911 }
912 });
913 }
914
915 #[test]
916 fn test_date_trunc_hour_timezones() {
917 let cases = [
918 (
919 vec![
920 "2020-09-08T00:30:00Z",
921 "2020-09-08T01:30:00Z",
922 "2020-09-08T02:30:00Z",
923 "2020-09-08T03:30:00Z",
924 "2020-09-08T04:30:00Z",
925 ],
926 Some("+00".into()),
927 vec![
928 "2020-09-08T00:00:00Z",
929 "2020-09-08T01:00:00Z",
930 "2020-09-08T02:00:00Z",
931 "2020-09-08T03:00:00Z",
932 "2020-09-08T04:00:00Z",
933 ],
934 ),
935 (
936 vec![
937 "2020-09-08T00:30:00Z",
938 "2020-09-08T01:30:00Z",
939 "2020-09-08T02:30:00Z",
940 "2020-09-08T03:30:00Z",
941 "2020-09-08T04:30:00Z",
942 ],
943 None,
944 vec![
945 "2020-09-08T00:00:00Z",
946 "2020-09-08T01:00:00Z",
947 "2020-09-08T02:00:00Z",
948 "2020-09-08T03:00:00Z",
949 "2020-09-08T04:00:00Z",
950 ],
951 ),
952 (
953 vec![
954 "2020-09-08T00:30:00Z",
955 "2020-09-08T01:30:00Z",
956 "2020-09-08T02:30:00Z",
957 "2020-09-08T03:30:00Z",
958 "2020-09-08T04:30:00Z",
959 ],
960 Some("-02".into()),
961 vec![
962 "2020-09-08T00:00:00Z",
963 "2020-09-08T01:00:00Z",
964 "2020-09-08T02:00:00Z",
965 "2020-09-08T03:00:00Z",
966 "2020-09-08T04:00:00Z",
967 ],
968 ),
969 (
970 vec![
971 "2020-09-08T00:30:00+05",
972 "2020-09-08T01:30:00+05",
973 "2020-09-08T02:30:00+05",
974 "2020-09-08T03:30:00+05",
975 "2020-09-08T04:30:00+05",
976 ],
977 Some("+05".into()),
978 vec![
979 "2020-09-08T00:00:00+05",
980 "2020-09-08T01:00:00+05",
981 "2020-09-08T02:00:00+05",
982 "2020-09-08T03:00:00+05",
983 "2020-09-08T04:00:00+05",
984 ],
985 ),
986 (
987 vec![
988 "2020-09-08T00:30:00+08",
989 "2020-09-08T01:30:00+08",
990 "2020-09-08T02:30:00+08",
991 "2020-09-08T03:30:00+08",
992 "2020-09-08T04:30:00+08",
993 ],
994 Some("+08".into()),
995 vec![
996 "2020-09-08T00:00:00+08",
997 "2020-09-08T01:00:00+08",
998 "2020-09-08T02:00:00+08",
999 "2020-09-08T03:00:00+08",
1000 "2020-09-08T04:00:00+08",
1001 ],
1002 ),
1003 (
1004 vec![
1005 "2024-10-26T23:30:00Z",
1006 "2024-10-27T00:30:00Z",
1007 "2024-10-27T01:30:00Z",
1008 "2024-10-27T02:30:00Z",
1009 ],
1010 Some("Europe/Berlin".into()),
1011 vec![
1012 "2024-10-27T01:00:00+02",
1013 "2024-10-27T02:00:00+02",
1014 "2024-10-27T02:00:00+01",
1015 "2024-10-27T03:00:00+01",
1016 ],
1017 ),
1018 (
1019 vec![
1020 "2018-02-18T00:30:00Z",
1021 "2018-02-18T01:30:00Z",
1022 "2018-02-18T02:30:00Z",
1023 "2018-02-18T03:30:00Z",
1024 "2018-11-04T01:00:00Z",
1025 "2018-11-04T02:00:00Z",
1026 "2018-11-04T03:00:00Z",
1027 "2018-11-04T04:00:00Z",
1028 ],
1029 Some("America/Sao_Paulo".into()),
1030 vec![
1031 "2018-02-17T22:00:00-02",
1032 "2018-02-17T23:00:00-02",
1033 "2018-02-17T23:00:00-03",
1034 "2018-02-18T00:00:00-03",
1035 "2018-11-03T22:00:00-03",
1036 "2018-11-03T23:00:00-03",
1037 "2018-11-04T01:00:00-02",
1038 "2018-11-04T02:00:00-02",
1039 ],
1040 ),
1041 (
1042 vec![
1043 "2024-10-26T23:30:00Z",
1044 "2024-10-27T00:30:00Z",
1045 "2024-10-27T01:30:00Z",
1046 "2024-10-27T02:30:00Z",
1047 ],
1048 Some("Asia/Kathmandu".into()), vec![
1050 "2024-10-27T05:00:00+05:45",
1051 "2024-10-27T06:00:00+05:45",
1052 "2024-10-27T07:00:00+05:45",
1053 "2024-10-27T08:00:00+05:45",
1054 ],
1055 ),
1056 ];
1057
1058 cases.iter().for_each(|(original, tz_opt, expected)| {
1059 let input = original
1060 .iter()
1061 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1062 .collect::<TimestampNanosecondArray>()
1063 .with_timezone_opt(tz_opt.clone());
1064 let right = expected
1065 .iter()
1066 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1067 .collect::<TimestampNanosecondArray>()
1068 .with_timezone_opt(tz_opt.clone());
1069 let batch_len = input.len();
1070 let arg_fields = vec![
1071 Field::new("a", DataType::Utf8, false).into(),
1072 Field::new("b", input.data_type().clone(), false).into(),
1073 ];
1074 let args = datafusion_expr::ScalarFunctionArgs {
1075 args: vec![
1076 ColumnarValue::Scalar(ScalarValue::from("hour")),
1077 ColumnarValue::Array(Arc::new(input)),
1078 ],
1079 arg_fields,
1080 number_rows: batch_len,
1081 return_field: Field::new(
1082 "f",
1083 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1084 true,
1085 )
1086 .into(),
1087 config_options: Arc::new(ConfigOptions::default()),
1088 };
1089 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1090 if let ColumnarValue::Array(result) = result {
1091 assert_eq!(
1092 result.data_type(),
1093 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1094 );
1095 let left = as_primitive_array::<TimestampNanosecondType>(&result);
1096 assert_eq!(left, &right);
1097 } else {
1098 panic!("unexpected column type");
1099 }
1100 });
1101 }
1102
1103 #[test]
1104 fn test_date_trunc_fine_granularity_timezones() {
1105 let cases = [
1106 (
1108 vec![
1109 "2020-09-08T13:42:29.190855Z",
1110 "2020-09-08T13:42:30.500000Z",
1111 "2020-09-08T13:42:31.999999Z",
1112 ],
1113 Some("+00".into()),
1114 "second",
1115 vec![
1116 "2020-09-08T13:42:29.000000Z",
1117 "2020-09-08T13:42:30.000000Z",
1118 "2020-09-08T13:42:31.000000Z",
1119 ],
1120 ),
1121 (
1122 vec![
1123 "2020-09-08T13:42:29.190855+05",
1124 "2020-09-08T13:42:30.500000+05",
1125 "2020-09-08T13:42:31.999999+05",
1126 ],
1127 Some("+05".into()),
1128 "second",
1129 vec![
1130 "2020-09-08T13:42:29.000000+05",
1131 "2020-09-08T13:42:30.000000+05",
1132 "2020-09-08T13:42:31.000000+05",
1133 ],
1134 ),
1135 (
1136 vec![
1137 "2020-09-08T13:42:29.190855Z",
1138 "2020-09-08T13:42:30.500000Z",
1139 "2020-09-08T13:42:31.999999Z",
1140 ],
1141 Some("Europe/Berlin".into()),
1142 "second",
1143 vec![
1144 "2020-09-08T13:42:29.000000Z",
1145 "2020-09-08T13:42:30.000000Z",
1146 "2020-09-08T13:42:31.000000Z",
1147 ],
1148 ),
1149 (
1151 vec![
1152 "2020-09-08T13:42:29.190855Z",
1153 "2020-09-08T13:43:30.500000Z",
1154 "2020-09-08T13:44:31.999999Z",
1155 ],
1156 Some("+00".into()),
1157 "minute",
1158 vec![
1159 "2020-09-08T13:42:00.000000Z",
1160 "2020-09-08T13:43:00.000000Z",
1161 "2020-09-08T13:44:00.000000Z",
1162 ],
1163 ),
1164 (
1165 vec![
1166 "2020-09-08T13:42:29.190855+08",
1167 "2020-09-08T13:43:30.500000+08",
1168 "2020-09-08T13:44:31.999999+08",
1169 ],
1170 Some("+08".into()),
1171 "minute",
1172 vec![
1173 "2020-09-08T13:42:00.000000+08",
1174 "2020-09-08T13:43:00.000000+08",
1175 "2020-09-08T13:44:00.000000+08",
1176 ],
1177 ),
1178 (
1179 vec![
1180 "2020-09-08T13:42:29.190855Z",
1181 "2020-09-08T13:43:30.500000Z",
1182 "2020-09-08T13:44:31.999999Z",
1183 ],
1184 Some("America/Sao_Paulo".into()),
1185 "minute",
1186 vec![
1187 "2020-09-08T13:42:00.000000Z",
1188 "2020-09-08T13:43:00.000000Z",
1189 "2020-09-08T13:44:00.000000Z",
1190 ],
1191 ),
1192 (
1194 vec![
1195 "2020-09-08T13:42:29.190855Z",
1196 "2020-09-08T13:43:30.500000Z",
1197 "2020-09-08T13:44:31.999999Z",
1198 ],
1199 None,
1200 "minute",
1201 vec![
1202 "2020-09-08T13:42:00.000000Z",
1203 "2020-09-08T13:43:00.000000Z",
1204 "2020-09-08T13:44:00.000000Z",
1205 ],
1206 ),
1207 (
1209 vec![
1210 "2020-09-08T13:42:29.190855Z",
1211 "2020-09-08T13:42:29.191999Z",
1212 "2020-09-08T13:42:29.192500Z",
1213 ],
1214 Some("Asia/Kolkata".into()),
1215 "millisecond",
1216 vec![
1217 "2020-09-08T19:12:29.190000+05:30",
1218 "2020-09-08T19:12:29.191000+05:30",
1219 "2020-09-08T19:12:29.192000+05:30",
1220 ],
1221 ),
1222 ];
1223
1224 cases
1225 .iter()
1226 .for_each(|(original, tz_opt, granularity, expected)| {
1227 let input = original
1228 .iter()
1229 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1230 .collect::<TimestampNanosecondArray>()
1231 .with_timezone_opt(tz_opt.clone());
1232 let right = expected
1233 .iter()
1234 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1235 .collect::<TimestampNanosecondArray>()
1236 .with_timezone_opt(tz_opt.clone());
1237 let batch_len = input.len();
1238 let arg_fields = vec![
1239 Field::new("a", DataType::Utf8, false).into(),
1240 Field::new("b", input.data_type().clone(), false).into(),
1241 ];
1242 let args = datafusion_expr::ScalarFunctionArgs {
1243 args: vec![
1244 ColumnarValue::Scalar(ScalarValue::from(*granularity)),
1245 ColumnarValue::Array(Arc::new(input)),
1246 ],
1247 arg_fields,
1248 number_rows: batch_len,
1249 return_field: Field::new(
1250 "f",
1251 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1252 true,
1253 )
1254 .into(),
1255 config_options: Arc::new(ConfigOptions::default()),
1256 };
1257 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1258 if let ColumnarValue::Array(result) = result {
1259 assert_eq!(
1260 result.data_type(),
1261 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1262 "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1263 );
1264 let left = as_primitive_array::<TimestampNanosecondType>(&result);
1265 assert_eq!(
1266 left, &right,
1267 "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1268 );
1269 } else {
1270 panic!("unexpected column type");
1271 }
1272 });
1273 }
1274}