1use std::any::Any;
19use std::num::NonZeroI64;
20use std::ops::{Add, Sub};
21use std::str::FromStr;
22use std::sync::Arc;
23
24use arrow::array::temporal_conversions::{
25 as_datetime_with_timezone, timestamp_ns_to_datetime,
26};
27use arrow::array::timezone::Tz;
28use arrow::array::types::{
29 ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType,
30 TimestampNanosecondType, TimestampSecondType,
31};
32use arrow::array::{Array, ArrayRef, PrimitiveArray};
33use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View};
34use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second};
35use datafusion_common::cast::as_primitive_array;
36use datafusion_common::{
37 exec_datafusion_err, exec_err, plan_err, DataFusionError, Result, ScalarValue,
38};
39use datafusion_expr::sort_properties::{ExprProperties, SortProperties};
40use datafusion_expr::TypeSignature::Exact;
41use datafusion_expr::{
42 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD,
43};
44use datafusion_macros::user_doc;
45
46use chrono::{
47 DateTime, Datelike, Duration, LocalResult, NaiveDateTime, Offset, TimeDelta, Timelike,
48};
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq)]
52enum DateTruncGranularity {
53 Microsecond,
54 Millisecond,
55 Second,
56 Minute,
57 Hour,
58 Day,
59 Week,
60 Month,
61 Quarter,
62 Year,
63}
64
65impl DateTruncGranularity {
66 const SUPPORTED_GRANULARITIES: &[&str] = &[
70 "microsecond",
71 "millisecond",
72 "second",
73 "minute",
74 "hour",
75 "day",
76 "week",
77 "month",
78 "quarter",
79 "year",
80 ];
81
82 fn from_str(s: &str) -> Result<Self> {
84 match s.to_lowercase().as_str() {
86 "microsecond" => Ok(Self::Microsecond),
87 "millisecond" => Ok(Self::Millisecond),
88 "second" => Ok(Self::Second),
89 "minute" => Ok(Self::Minute),
90 "hour" => Ok(Self::Hour),
91 "day" => Ok(Self::Day),
92 "week" => Ok(Self::Week),
93 "month" => Ok(Self::Month),
94 "quarter" => Ok(Self::Quarter),
95 "year" => Ok(Self::Year),
96 _ => {
97 let supported = Self::SUPPORTED_GRANULARITIES.join(", ");
98 exec_err!(
99 "Unsupported date_trunc granularity: '{s}'. Supported values are: {supported}"
100 )
101 }
102 }
103 }
104
105 fn is_fine_granularity(&self) -> bool {
108 matches!(
109 self,
110 Self::Second | Self::Minute | Self::Millisecond | Self::Microsecond
111 )
112 }
113
114 fn is_fine_granularity_utc(&self) -> bool {
117 self.is_fine_granularity() || matches!(self, Self::Hour | Self::Day)
118 }
119}
120
121#[user_doc(
122 doc_section(label = "Time and Date Functions"),
123 description = "Truncates a timestamp value to a specified precision.",
124 syntax_example = "date_trunc(precision, expression)",
125 argument(
126 name = "precision",
127 description = r#"Time precision to truncate to. The following precisions are supported:
128
129 - year / YEAR
130 - quarter / QUARTER
131 - month / MONTH
132 - week / WEEK
133 - day / DAY
134 - hour / HOUR
135 - minute / MINUTE
136 - second / SECOND
137 - millisecond / MILLISECOND
138 - microsecond / MICROSECOND
139"#
140 ),
141 argument(
142 name = "expression",
143 description = "Time expression to operate on. Can be a constant, column, or function."
144 )
145)]
146#[derive(Debug, PartialEq, Eq, Hash)]
147pub struct DateTruncFunc {
148 signature: Signature,
149 aliases: Vec<String>,
150}
151
152impl Default for DateTruncFunc {
153 fn default() -> Self {
154 Self::new()
155 }
156}
157
158impl DateTruncFunc {
159 pub fn new() -> Self {
160 Self {
161 signature: Signature::one_of(
162 vec![
163 Exact(vec![Utf8, Timestamp(Nanosecond, None)]),
164 Exact(vec![Utf8View, Timestamp(Nanosecond, None)]),
165 Exact(vec![
166 Utf8,
167 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
168 ]),
169 Exact(vec![
170 Utf8View,
171 Timestamp(Nanosecond, Some(TIMEZONE_WILDCARD.into())),
172 ]),
173 Exact(vec![Utf8, Timestamp(Microsecond, None)]),
174 Exact(vec![Utf8View, Timestamp(Microsecond, None)]),
175 Exact(vec![
176 Utf8,
177 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
178 ]),
179 Exact(vec![
180 Utf8View,
181 Timestamp(Microsecond, Some(TIMEZONE_WILDCARD.into())),
182 ]),
183 Exact(vec![Utf8, Timestamp(Millisecond, None)]),
184 Exact(vec![Utf8View, Timestamp(Millisecond, None)]),
185 Exact(vec![
186 Utf8,
187 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
188 ]),
189 Exact(vec![
190 Utf8View,
191 Timestamp(Millisecond, Some(TIMEZONE_WILDCARD.into())),
192 ]),
193 Exact(vec![Utf8, Timestamp(Second, None)]),
194 Exact(vec![Utf8View, Timestamp(Second, None)]),
195 Exact(vec![
196 Utf8,
197 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
198 ]),
199 Exact(vec![
200 Utf8View,
201 Timestamp(Second, Some(TIMEZONE_WILDCARD.into())),
202 ]),
203 ],
204 Volatility::Immutable,
205 ),
206 aliases: vec![String::from("datetrunc")],
207 }
208 }
209}
210
211impl ScalarUDFImpl for DateTruncFunc {
212 fn as_any(&self) -> &dyn Any {
213 self
214 }
215
216 fn name(&self) -> &str {
217 "date_trunc"
218 }
219
220 fn signature(&self) -> &Signature {
221 &self.signature
222 }
223
224 fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
225 match &arg_types[1] {
226 Timestamp(Nanosecond, None) | Utf8 | DataType::Date32 | Null => {
227 Ok(Timestamp(Nanosecond, None))
228 }
229 Timestamp(Nanosecond, tz_opt) => Ok(Timestamp(Nanosecond, tz_opt.clone())),
230 Timestamp(Microsecond, tz_opt) => Ok(Timestamp(Microsecond, tz_opt.clone())),
231 Timestamp(Millisecond, tz_opt) => Ok(Timestamp(Millisecond, tz_opt.clone())),
232 Timestamp(Second, tz_opt) => Ok(Timestamp(Second, tz_opt.clone())),
233 _ => plan_err!(
234 "The date_trunc function can only accept timestamp as the second arg."
235 ),
236 }
237 }
238
239 fn invoke_with_args(
240 &self,
241 args: datafusion_expr::ScalarFunctionArgs,
242 ) -> Result<ColumnarValue> {
243 let args = args.args;
244 let (granularity, array) = (&args[0], &args[1]);
245
246 let granularity_str = if let ColumnarValue::Scalar(ScalarValue::Utf8(Some(v))) =
247 granularity
248 {
249 v.to_lowercase()
250 } else if let ColumnarValue::Scalar(ScalarValue::Utf8View(Some(v))) = granularity
251 {
252 v.to_lowercase()
253 } else {
254 return exec_err!("Granularity of `date_trunc` must be non-null scalar Utf8");
255 };
256
257 let granularity = DateTruncGranularity::from_str(&granularity_str)?;
258
259 fn process_array<T: ArrowTimestampType>(
260 array: &dyn Array,
261 granularity: DateTruncGranularity,
262 tz_opt: &Option<Arc<str>>,
263 ) -> Result<ColumnarValue> {
264 let parsed_tz = parse_tz(tz_opt)?;
265 let array = as_primitive_array::<T>(array)?;
266
267 if granularity.is_fine_granularity()
273 || (parsed_tz.is_none() && granularity.is_fine_granularity_utc())
274 {
275 let result = general_date_trunc_array_fine_granularity(
276 T::UNIT,
277 array,
278 granularity,
279 tz_opt.clone(),
280 )?;
281 return Ok(ColumnarValue::Array(result));
282 }
283
284 let array: PrimitiveArray<T> = array
285 .try_unary(|x| general_date_trunc(T::UNIT, x, parsed_tz, granularity))?
286 .with_timezone_opt(tz_opt.clone());
287 Ok(ColumnarValue::Array(Arc::new(array)))
288 }
289
290 fn process_scalar<T: ArrowTimestampType>(
291 v: &Option<i64>,
292 granularity: DateTruncGranularity,
293 tz_opt: &Option<Arc<str>>,
294 ) -> Result<ColumnarValue> {
295 let parsed_tz = parse_tz(tz_opt)?;
296 let value = if let Some(v) = v {
297 Some(general_date_trunc(T::UNIT, *v, parsed_tz, granularity)?)
298 } else {
299 None
300 };
301 let value = ScalarValue::new_timestamp::<T>(value, tz_opt.clone());
302 Ok(ColumnarValue::Scalar(value))
303 }
304
305 Ok(match array {
306 ColumnarValue::Scalar(ScalarValue::TimestampNanosecond(v, tz_opt)) => {
307 process_scalar::<TimestampNanosecondType>(v, granularity, tz_opt)?
308 }
309 ColumnarValue::Scalar(ScalarValue::TimestampMicrosecond(v, tz_opt)) => {
310 process_scalar::<TimestampMicrosecondType>(v, granularity, tz_opt)?
311 }
312 ColumnarValue::Scalar(ScalarValue::TimestampMillisecond(v, tz_opt)) => {
313 process_scalar::<TimestampMillisecondType>(v, granularity, tz_opt)?
314 }
315 ColumnarValue::Scalar(ScalarValue::TimestampSecond(v, tz_opt)) => {
316 process_scalar::<TimestampSecondType>(v, granularity, tz_opt)?
317 }
318 ColumnarValue::Array(array) => {
319 let array_type = array.data_type();
320 if let Timestamp(unit, tz_opt) = array_type {
321 match unit {
322 Second => process_array::<TimestampSecondType>(
323 array,
324 granularity,
325 tz_opt,
326 )?,
327 Millisecond => process_array::<TimestampMillisecondType>(
328 array,
329 granularity,
330 tz_opt,
331 )?,
332 Microsecond => process_array::<TimestampMicrosecondType>(
333 array,
334 granularity,
335 tz_opt,
336 )?,
337 Nanosecond => process_array::<TimestampNanosecondType>(
338 array,
339 granularity,
340 tz_opt,
341 )?,
342 }
343 } else {
344 return exec_err!("second argument of `date_trunc` is an unsupported array type: {array_type}");
345 }
346 }
347 _ => {
348 return exec_err!(
349 "second argument of `date_trunc` must be timestamp scalar or array"
350 );
351 }
352 })
353 }
354
355 fn aliases(&self) -> &[String] {
356 &self.aliases
357 }
358
359 fn output_ordering(&self, input: &[ExprProperties]) -> Result<SortProperties> {
360 let precision = &input[0];
362 let date_value = &input[1];
363
364 if precision.sort_properties.eq(&SortProperties::Singleton) {
365 Ok(date_value.sort_properties)
366 } else {
367 Ok(SortProperties::Unordered)
368 }
369 }
370 fn documentation(&self) -> Option<&Documentation> {
371 self.doc()
372 }
373}
374
375fn _date_trunc_coarse<T>(
376 granularity: DateTruncGranularity,
377 value: Option<T>,
378) -> Result<Option<T>>
379where
380 T: Datelike + Timelike + Sub<Duration, Output = T> + Copy,
381{
382 let value = match granularity {
383 DateTruncGranularity::Millisecond => value,
384 DateTruncGranularity::Microsecond => value,
385 DateTruncGranularity::Second => value.and_then(|d| d.with_nanosecond(0)),
386 DateTruncGranularity::Minute => value
387 .and_then(|d| d.with_nanosecond(0))
388 .and_then(|d| d.with_second(0)),
389 DateTruncGranularity::Hour => value
390 .and_then(|d| d.with_nanosecond(0))
391 .and_then(|d| d.with_second(0))
392 .and_then(|d| d.with_minute(0)),
393 DateTruncGranularity::Day => value
394 .and_then(|d| d.with_nanosecond(0))
395 .and_then(|d| d.with_second(0))
396 .and_then(|d| d.with_minute(0))
397 .and_then(|d| d.with_hour(0)),
398 DateTruncGranularity::Week => value
399 .and_then(|d| d.with_nanosecond(0))
400 .and_then(|d| d.with_second(0))
401 .and_then(|d| d.with_minute(0))
402 .and_then(|d| d.with_hour(0))
403 .map(|d| {
404 d - TimeDelta::try_seconds(60 * 60 * 24 * d.weekday() as i64).unwrap()
405 }),
406 DateTruncGranularity::Month => value
407 .and_then(|d| d.with_nanosecond(0))
408 .and_then(|d| d.with_second(0))
409 .and_then(|d| d.with_minute(0))
410 .and_then(|d| d.with_hour(0))
411 .and_then(|d| d.with_day0(0)),
412 DateTruncGranularity::Quarter => value
413 .and_then(|d| d.with_nanosecond(0))
414 .and_then(|d| d.with_second(0))
415 .and_then(|d| d.with_minute(0))
416 .and_then(|d| d.with_hour(0))
417 .and_then(|d| d.with_day0(0))
418 .and_then(|d| d.with_month(quarter_month(&d))),
419 DateTruncGranularity::Year => value
420 .and_then(|d| d.with_nanosecond(0))
421 .and_then(|d| d.with_second(0))
422 .and_then(|d| d.with_minute(0))
423 .and_then(|d| d.with_hour(0))
424 .and_then(|d| d.with_day0(0))
425 .and_then(|d| d.with_month0(0)),
426 };
427 Ok(value)
428}
429
430fn quarter_month<T>(date: &T) -> u32
431where
432 T: Datelike,
433{
434 1 + 3 * ((date.month() - 1) / 3)
435}
436
437fn _date_trunc_coarse_with_tz(
438 granularity: DateTruncGranularity,
439 value: Option<DateTime<Tz>>,
440) -> Result<Option<i64>> {
441 if let Some(value) = value {
442 let local = value.naive_local();
443 let truncated = _date_trunc_coarse::<NaiveDateTime>(granularity, Some(local))?;
444 let truncated = truncated.and_then(|truncated| {
445 match truncated.and_local_timezone(value.timezone()) {
446 LocalResult::None => {
447 truncated
453 .sub(TimeDelta::try_hours(3).unwrap())
454 .and_local_timezone(value.timezone())
455 .single()
456 .map(|v| v.add(TimeDelta::try_hours(3).unwrap()))
457 }
458 LocalResult::Single(datetime) => Some(datetime),
459 LocalResult::Ambiguous(datetime1, datetime2) => {
460 if datetime1.offset().fix() == value.offset().fix() {
465 Some(datetime1)
466 } else {
467 Some(datetime2)
468 }
469 }
470 }
471 });
472 Ok(truncated.and_then(|value| value.timestamp_nanos_opt()))
473 } else {
474 _date_trunc_coarse::<NaiveDateTime>(granularity, None)?;
475 Ok(None)
476 }
477}
478
479fn _date_trunc_coarse_without_tz(
480 granularity: DateTruncGranularity,
481 value: Option<NaiveDateTime>,
482) -> Result<Option<i64>> {
483 let value = _date_trunc_coarse::<NaiveDateTime>(granularity, value)?;
484 Ok(value.and_then(|value| value.and_utc().timestamp_nanos_opt()))
485}
486
487fn date_trunc_coarse(
492 granularity: DateTruncGranularity,
493 value: i64,
494 tz: Option<Tz>,
495) -> Result<i64> {
496 let value = match tz {
497 Some(tz) => {
498 let value = as_datetime_with_timezone::<TimestampNanosecondType>(value, tz)
501 .ok_or(exec_datafusion_err!("Timestamp {value} out of range"))?;
502 _date_trunc_coarse_with_tz(granularity, Some(value))
503 }
504 None => {
505 let value = timestamp_ns_to_datetime(value)
507 .ok_or_else(|| exec_datafusion_err!("Timestamp {value} out of range"))?;
508 _date_trunc_coarse_without_tz(granularity, Some(value))
509 }
510 }?;
511
512 Ok(value.unwrap())
514}
515
516fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
523 tu: TimeUnit,
524 array: &PrimitiveArray<T>,
525 granularity: DateTruncGranularity,
526 tz_opt: Option<Arc<str>>,
527) -> Result<ArrayRef> {
528 let unit = match (tu, granularity) {
529 (Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
530 (Second, DateTruncGranularity::Hour) => NonZeroI64::new(3600),
531 (Second, DateTruncGranularity::Day) => NonZeroI64::new(86400),
532
533 (Millisecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000),
534 (Millisecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000),
535 (Millisecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000),
536 (Millisecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000),
537
538 (Microsecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000),
539 (Microsecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000),
540 (Microsecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000),
541 (Microsecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000),
542 (Microsecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000),
543
544 (Nanosecond, DateTruncGranularity::Microsecond) => NonZeroI64::new(1_000),
545 (Nanosecond, DateTruncGranularity::Millisecond) => NonZeroI64::new(1_000_000),
546 (Nanosecond, DateTruncGranularity::Second) => NonZeroI64::new(1_000_000_000),
547 (Nanosecond, DateTruncGranularity::Minute) => NonZeroI64::new(60_000_000_000),
548 (Nanosecond, DateTruncGranularity::Hour) => NonZeroI64::new(3_600_000_000_000),
549 (Nanosecond, DateTruncGranularity::Day) => NonZeroI64::new(86_400_000_000_000),
550 _ => None,
551 };
552
553 if let Some(unit) = unit {
554 let unit = unit.get();
555 let array = PrimitiveArray::<T>::from_iter_values_with_nulls(
556 array
557 .values()
558 .iter()
559 .map(|v| *v - i64::rem_euclid(*v, unit)),
560 array.nulls().cloned(),
561 )
562 .with_timezone_opt(tz_opt);
563 Ok(Arc::new(array))
564 } else {
565 Ok(Arc::new(array.clone()))
567 }
568}
569
570fn general_date_trunc(
572 tu: TimeUnit,
573 value: i64,
574 tz: Option<Tz>,
575 granularity: DateTruncGranularity,
576) -> Result<i64, DataFusionError> {
577 let scale = match tu {
578 Second => 1_000_000_000,
579 Millisecond => 1_000_000,
580 Microsecond => 1_000,
581 Nanosecond => 1,
582 };
583
584 let nano = date_trunc_coarse(granularity, scale * value, tz)?;
586
587 let result = match tu {
588 Second => match granularity {
589 DateTruncGranularity::Minute => nano / 1_000_000_000 / 60 * 60,
590 _ => nano / 1_000_000_000,
591 },
592 Millisecond => match granularity {
593 DateTruncGranularity::Minute => nano / 1_000_000 / 1_000 / 60 * 1_000 * 60,
594 DateTruncGranularity::Second => nano / 1_000_000 / 1_000 * 1_000,
595 _ => nano / 1_000_000,
596 },
597 Microsecond => match granularity {
598 DateTruncGranularity::Minute => {
599 nano / 1_000 / 1_000_000 / 60 * 60 * 1_000_000
600 }
601 DateTruncGranularity::Second => nano / 1_000 / 1_000_000 * 1_000_000,
602 DateTruncGranularity::Millisecond => nano / 1_000 / 1_000 * 1_000,
603 _ => nano / 1_000,
604 },
605 _ => match granularity {
606 DateTruncGranularity::Minute => {
607 nano / 1_000_000_000 / 60 * 1_000_000_000 * 60
608 }
609 DateTruncGranularity::Second => nano / 1_000_000_000 * 1_000_000_000,
610 DateTruncGranularity::Millisecond => nano / 1_000_000 * 1_000_000,
611 DateTruncGranularity::Microsecond => nano / 1_000 * 1_000,
612 _ => nano,
613 },
614 };
615 Ok(result)
616}
617
618fn parse_tz(tz: &Option<Arc<str>>) -> Result<Option<Tz>> {
619 tz.as_ref()
620 .map(|tz| {
621 Tz::from_str(tz)
622 .map_err(|op| exec_datafusion_err!("failed on timezone {tz}: {op:?}"))
623 })
624 .transpose()
625}
626
627#[cfg(test)]
628mod tests {
629 use std::sync::Arc;
630
631 use crate::datetime::date_trunc::{
632 date_trunc_coarse, DateTruncFunc, DateTruncGranularity,
633 };
634
635 use arrow::array::cast::as_primitive_array;
636 use arrow::array::types::TimestampNanosecondType;
637 use arrow::array::{Array, TimestampNanosecondArray};
638 use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
639 use arrow::datatypes::{DataType, Field, TimeUnit};
640 use datafusion_common::config::ConfigOptions;
641 use datafusion_common::ScalarValue;
642 use datafusion_expr::{ColumnarValue, ScalarUDFImpl};
643
644 #[test]
645 fn date_trunc_test() {
646 let cases = vec![
647 (
648 "2020-09-08T13:42:29.190855Z",
649 "second",
650 "2020-09-08T13:42:29.000000Z",
651 ),
652 (
653 "2020-09-08T13:42:29.190855Z",
654 "minute",
655 "2020-09-08T13:42:00.000000Z",
656 ),
657 (
658 "2020-09-08T13:42:29.190855Z",
659 "hour",
660 "2020-09-08T13:00:00.000000Z",
661 ),
662 (
663 "2020-09-08T13:42:29.190855Z",
664 "day",
665 "2020-09-08T00:00:00.000000Z",
666 ),
667 (
668 "2020-09-08T13:42:29.190855Z",
669 "week",
670 "2020-09-07T00:00:00.000000Z",
671 ),
672 (
673 "2020-09-08T13:42:29.190855Z",
674 "month",
675 "2020-09-01T00:00:00.000000Z",
676 ),
677 (
678 "2020-09-08T13:42:29.190855Z",
679 "year",
680 "2020-01-01T00:00:00.000000Z",
681 ),
682 (
684 "2021-01-01T13:42:29.190855Z",
685 "week",
686 "2020-12-28T00:00:00.000000Z",
687 ),
688 (
689 "2020-01-01T13:42:29.190855Z",
690 "week",
691 "2019-12-30T00:00:00.000000Z",
692 ),
693 (
695 "2020-01-01T13:42:29.190855Z",
696 "quarter",
697 "2020-01-01T00:00:00.000000Z",
698 ),
699 (
700 "2020-02-01T13:42:29.190855Z",
701 "quarter",
702 "2020-01-01T00:00:00.000000Z",
703 ),
704 (
705 "2020-03-01T13:42:29.190855Z",
706 "quarter",
707 "2020-01-01T00:00:00.000000Z",
708 ),
709 (
710 "2020-04-01T13:42:29.190855Z",
711 "quarter",
712 "2020-04-01T00:00:00.000000Z",
713 ),
714 (
715 "2020-08-01T13:42:29.190855Z",
716 "quarter",
717 "2020-07-01T00:00:00.000000Z",
718 ),
719 (
720 "2020-11-01T13:42:29.190855Z",
721 "quarter",
722 "2020-10-01T00:00:00.000000Z",
723 ),
724 (
725 "2020-12-01T13:42:29.190855Z",
726 "quarter",
727 "2020-10-01T00:00:00.000000Z",
728 ),
729 ];
730
731 cases.iter().for_each(|(original, granularity, expected)| {
732 let left = string_to_timestamp_nanos(original).unwrap();
733 let right = string_to_timestamp_nanos(expected).unwrap();
734 let granularity_enum = DateTruncGranularity::from_str(granularity).unwrap();
735 let result = date_trunc_coarse(granularity_enum, left, None).unwrap();
736 assert_eq!(result, right, "{original} = {expected}");
737 });
738 }
739
740 #[test]
741 fn test_date_trunc_timezones() {
742 let cases = [
743 (
744 vec![
745 "2020-09-08T00:00:00Z",
746 "2020-09-08T01:00:00Z",
747 "2020-09-08T02:00:00Z",
748 "2020-09-08T03:00:00Z",
749 "2020-09-08T04:00:00Z",
750 ],
751 Some("+00".into()),
752 vec![
753 "2020-09-08T00:00:00Z",
754 "2020-09-08T00:00:00Z",
755 "2020-09-08T00:00:00Z",
756 "2020-09-08T00:00:00Z",
757 "2020-09-08T00:00:00Z",
758 ],
759 ),
760 (
761 vec![
762 "2020-09-08T00:00:00Z",
763 "2020-09-08T01:00:00Z",
764 "2020-09-08T02:00:00Z",
765 "2020-09-08T03:00:00Z",
766 "2020-09-08T04:00:00Z",
767 ],
768 None,
769 vec![
770 "2020-09-08T00:00:00Z",
771 "2020-09-08T00:00:00Z",
772 "2020-09-08T00:00:00Z",
773 "2020-09-08T00:00:00Z",
774 "2020-09-08T00:00:00Z",
775 ],
776 ),
777 (
778 vec![
779 "2020-09-08T00:00:00Z",
780 "2020-09-08T01:00:00Z",
781 "2020-09-08T02:00:00Z",
782 "2020-09-08T03:00:00Z",
783 "2020-09-08T04:00:00Z",
784 ],
785 Some("-02".into()),
786 vec![
787 "2020-09-07T02:00:00Z",
788 "2020-09-07T02:00:00Z",
789 "2020-09-08T02:00:00Z",
790 "2020-09-08T02:00:00Z",
791 "2020-09-08T02:00:00Z",
792 ],
793 ),
794 (
795 vec![
796 "2020-09-08T00:00:00+05",
797 "2020-09-08T01:00:00+05",
798 "2020-09-08T02:00:00+05",
799 "2020-09-08T03:00:00+05",
800 "2020-09-08T04:00:00+05",
801 ],
802 Some("+05".into()),
803 vec![
804 "2020-09-08T00:00:00+05",
805 "2020-09-08T00:00:00+05",
806 "2020-09-08T00:00:00+05",
807 "2020-09-08T00:00:00+05",
808 "2020-09-08T00:00:00+05",
809 ],
810 ),
811 (
812 vec![
813 "2020-09-08T00:00:00+08",
814 "2020-09-08T01:00:00+08",
815 "2020-09-08T02:00:00+08",
816 "2020-09-08T03:00:00+08",
817 "2020-09-08T04:00:00+08",
818 ],
819 Some("+08".into()),
820 vec![
821 "2020-09-08T00:00:00+08",
822 "2020-09-08T00:00:00+08",
823 "2020-09-08T00:00:00+08",
824 "2020-09-08T00:00:00+08",
825 "2020-09-08T00:00:00+08",
826 ],
827 ),
828 (
829 vec![
830 "2024-10-26T23:00:00Z",
831 "2024-10-27T00:00:00Z",
832 "2024-10-27T01:00:00Z",
833 "2024-10-27T02:00:00Z",
834 ],
835 Some("Europe/Berlin".into()),
836 vec![
837 "2024-10-27T00:00:00+02",
838 "2024-10-27T00:00:00+02",
839 "2024-10-27T00:00:00+02",
840 "2024-10-27T00:00:00+02",
841 ],
842 ),
843 (
844 vec![
845 "2018-02-18T00:00:00Z",
846 "2018-02-18T01:00:00Z",
847 "2018-02-18T02:00:00Z",
848 "2018-02-18T03:00:00Z",
849 "2018-11-04T01:00:00Z",
850 "2018-11-04T02:00:00Z",
851 "2018-11-04T03:00:00Z",
852 "2018-11-04T04:00:00Z",
853 ],
854 Some("America/Sao_Paulo".into()),
855 vec![
856 "2018-02-17T00:00:00-02",
857 "2018-02-17T00:00:00-02",
858 "2018-02-17T00:00:00-02",
859 "2018-02-18T00:00:00-03",
860 "2018-11-03T00:00:00-03",
861 "2018-11-03T00:00:00-03",
862 "2018-11-04T01:00:00-02",
863 "2018-11-04T01:00:00-02",
864 ],
865 ),
866 ];
867
868 cases.iter().for_each(|(original, tz_opt, expected)| {
869 let input = original
870 .iter()
871 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
872 .collect::<TimestampNanosecondArray>()
873 .with_timezone_opt(tz_opt.clone());
874 let right = expected
875 .iter()
876 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
877 .collect::<TimestampNanosecondArray>()
878 .with_timezone_opt(tz_opt.clone());
879 let batch_len = input.len();
880 let arg_fields = vec![
881 Field::new("a", DataType::Utf8, false).into(),
882 Field::new("b", input.data_type().clone(), false).into(),
883 ];
884 let args = datafusion_expr::ScalarFunctionArgs {
885 args: vec![
886 ColumnarValue::Scalar(ScalarValue::from("day")),
887 ColumnarValue::Array(Arc::new(input)),
888 ],
889 arg_fields,
890 number_rows: batch_len,
891 return_field: Field::new(
892 "f",
893 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
894 true,
895 )
896 .into(),
897 config_options: Arc::new(ConfigOptions::default()),
898 };
899 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
900 if let ColumnarValue::Array(result) = result {
901 assert_eq!(
902 result.data_type(),
903 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
904 );
905 let left = as_primitive_array::<TimestampNanosecondType>(&result);
906 assert_eq!(left, &right);
907 } else {
908 panic!("unexpected column type");
909 }
910 });
911 }
912
913 #[test]
914 fn test_date_trunc_hour_timezones() {
915 let cases = [
916 (
917 vec![
918 "2020-09-08T00:30:00Z",
919 "2020-09-08T01:30:00Z",
920 "2020-09-08T02:30:00Z",
921 "2020-09-08T03:30:00Z",
922 "2020-09-08T04:30:00Z",
923 ],
924 Some("+00".into()),
925 vec![
926 "2020-09-08T00:00:00Z",
927 "2020-09-08T01:00:00Z",
928 "2020-09-08T02:00:00Z",
929 "2020-09-08T03:00:00Z",
930 "2020-09-08T04:00:00Z",
931 ],
932 ),
933 (
934 vec![
935 "2020-09-08T00:30:00Z",
936 "2020-09-08T01:30:00Z",
937 "2020-09-08T02:30:00Z",
938 "2020-09-08T03:30:00Z",
939 "2020-09-08T04:30:00Z",
940 ],
941 None,
942 vec![
943 "2020-09-08T00:00:00Z",
944 "2020-09-08T01:00:00Z",
945 "2020-09-08T02:00:00Z",
946 "2020-09-08T03:00:00Z",
947 "2020-09-08T04:00:00Z",
948 ],
949 ),
950 (
951 vec![
952 "2020-09-08T00:30:00Z",
953 "2020-09-08T01:30:00Z",
954 "2020-09-08T02:30:00Z",
955 "2020-09-08T03:30:00Z",
956 "2020-09-08T04:30:00Z",
957 ],
958 Some("-02".into()),
959 vec![
960 "2020-09-08T00:00:00Z",
961 "2020-09-08T01:00:00Z",
962 "2020-09-08T02:00:00Z",
963 "2020-09-08T03:00:00Z",
964 "2020-09-08T04:00:00Z",
965 ],
966 ),
967 (
968 vec![
969 "2020-09-08T00:30:00+05",
970 "2020-09-08T01:30:00+05",
971 "2020-09-08T02:30:00+05",
972 "2020-09-08T03:30:00+05",
973 "2020-09-08T04:30:00+05",
974 ],
975 Some("+05".into()),
976 vec![
977 "2020-09-08T00:00:00+05",
978 "2020-09-08T01:00:00+05",
979 "2020-09-08T02:00:00+05",
980 "2020-09-08T03:00:00+05",
981 "2020-09-08T04:00:00+05",
982 ],
983 ),
984 (
985 vec![
986 "2020-09-08T00:30:00+08",
987 "2020-09-08T01:30:00+08",
988 "2020-09-08T02:30:00+08",
989 "2020-09-08T03:30:00+08",
990 "2020-09-08T04:30:00+08",
991 ],
992 Some("+08".into()),
993 vec![
994 "2020-09-08T00:00:00+08",
995 "2020-09-08T01:00:00+08",
996 "2020-09-08T02:00:00+08",
997 "2020-09-08T03:00:00+08",
998 "2020-09-08T04:00:00+08",
999 ],
1000 ),
1001 (
1002 vec![
1003 "2024-10-26T23:30:00Z",
1004 "2024-10-27T00:30:00Z",
1005 "2024-10-27T01:30:00Z",
1006 "2024-10-27T02:30:00Z",
1007 ],
1008 Some("Europe/Berlin".into()),
1009 vec![
1010 "2024-10-27T01:00:00+02",
1011 "2024-10-27T02:00:00+02",
1012 "2024-10-27T02:00:00+01",
1013 "2024-10-27T03:00:00+01",
1014 ],
1015 ),
1016 (
1017 vec![
1018 "2018-02-18T00:30:00Z",
1019 "2018-02-18T01:30:00Z",
1020 "2018-02-18T02:30:00Z",
1021 "2018-02-18T03:30:00Z",
1022 "2018-11-04T01:00:00Z",
1023 "2018-11-04T02:00:00Z",
1024 "2018-11-04T03:00:00Z",
1025 "2018-11-04T04:00:00Z",
1026 ],
1027 Some("America/Sao_Paulo".into()),
1028 vec![
1029 "2018-02-17T22:00:00-02",
1030 "2018-02-17T23:00:00-02",
1031 "2018-02-17T23:00:00-03",
1032 "2018-02-18T00:00:00-03",
1033 "2018-11-03T22:00:00-03",
1034 "2018-11-03T23:00:00-03",
1035 "2018-11-04T01:00:00-02",
1036 "2018-11-04T02:00:00-02",
1037 ],
1038 ),
1039 (
1040 vec![
1041 "2024-10-26T23:30:00Z",
1042 "2024-10-27T00:30:00Z",
1043 "2024-10-27T01:30:00Z",
1044 "2024-10-27T02:30:00Z",
1045 ],
1046 Some("Asia/Kathmandu".into()), vec![
1048 "2024-10-27T05:00:00+05:45",
1049 "2024-10-27T06:00:00+05:45",
1050 "2024-10-27T07:00:00+05:45",
1051 "2024-10-27T08:00:00+05:45",
1052 ],
1053 ),
1054 ];
1055
1056 cases.iter().for_each(|(original, tz_opt, expected)| {
1057 let input = original
1058 .iter()
1059 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1060 .collect::<TimestampNanosecondArray>()
1061 .with_timezone_opt(tz_opt.clone());
1062 let right = expected
1063 .iter()
1064 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1065 .collect::<TimestampNanosecondArray>()
1066 .with_timezone_opt(tz_opt.clone());
1067 let batch_len = input.len();
1068 let arg_fields = vec![
1069 Field::new("a", DataType::Utf8, false).into(),
1070 Field::new("b", input.data_type().clone(), false).into(),
1071 ];
1072 let args = datafusion_expr::ScalarFunctionArgs {
1073 args: vec![
1074 ColumnarValue::Scalar(ScalarValue::from("hour")),
1075 ColumnarValue::Array(Arc::new(input)),
1076 ],
1077 arg_fields,
1078 number_rows: batch_len,
1079 return_field: Field::new(
1080 "f",
1081 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1082 true,
1083 )
1084 .into(),
1085 config_options: Arc::new(ConfigOptions::default()),
1086 };
1087 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1088 if let ColumnarValue::Array(result) = result {
1089 assert_eq!(
1090 result.data_type(),
1091 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone())
1092 );
1093 let left = as_primitive_array::<TimestampNanosecondType>(&result);
1094 assert_eq!(left, &right);
1095 } else {
1096 panic!("unexpected column type");
1097 }
1098 });
1099 }
1100
1101 #[test]
1102 fn test_date_trunc_fine_granularity_timezones() {
1103 let cases = [
1104 (
1106 vec![
1107 "2020-09-08T13:42:29.190855Z",
1108 "2020-09-08T13:42:30.500000Z",
1109 "2020-09-08T13:42:31.999999Z",
1110 ],
1111 Some("+00".into()),
1112 "second",
1113 vec![
1114 "2020-09-08T13:42:29.000000Z",
1115 "2020-09-08T13:42:30.000000Z",
1116 "2020-09-08T13:42:31.000000Z",
1117 ],
1118 ),
1119 (
1120 vec![
1121 "2020-09-08T13:42:29.190855+05",
1122 "2020-09-08T13:42:30.500000+05",
1123 "2020-09-08T13:42:31.999999+05",
1124 ],
1125 Some("+05".into()),
1126 "second",
1127 vec![
1128 "2020-09-08T13:42:29.000000+05",
1129 "2020-09-08T13:42:30.000000+05",
1130 "2020-09-08T13:42:31.000000+05",
1131 ],
1132 ),
1133 (
1134 vec![
1135 "2020-09-08T13:42:29.190855Z",
1136 "2020-09-08T13:42:30.500000Z",
1137 "2020-09-08T13:42:31.999999Z",
1138 ],
1139 Some("Europe/Berlin".into()),
1140 "second",
1141 vec![
1142 "2020-09-08T13:42:29.000000Z",
1143 "2020-09-08T13:42:30.000000Z",
1144 "2020-09-08T13:42:31.000000Z",
1145 ],
1146 ),
1147 (
1149 vec![
1150 "2020-09-08T13:42:29.190855Z",
1151 "2020-09-08T13:43:30.500000Z",
1152 "2020-09-08T13:44:31.999999Z",
1153 ],
1154 Some("+00".into()),
1155 "minute",
1156 vec![
1157 "2020-09-08T13:42:00.000000Z",
1158 "2020-09-08T13:43:00.000000Z",
1159 "2020-09-08T13:44:00.000000Z",
1160 ],
1161 ),
1162 (
1163 vec![
1164 "2020-09-08T13:42:29.190855+08",
1165 "2020-09-08T13:43:30.500000+08",
1166 "2020-09-08T13:44:31.999999+08",
1167 ],
1168 Some("+08".into()),
1169 "minute",
1170 vec![
1171 "2020-09-08T13:42:00.000000+08",
1172 "2020-09-08T13:43:00.000000+08",
1173 "2020-09-08T13:44:00.000000+08",
1174 ],
1175 ),
1176 (
1177 vec![
1178 "2020-09-08T13:42:29.190855Z",
1179 "2020-09-08T13:43:30.500000Z",
1180 "2020-09-08T13:44:31.999999Z",
1181 ],
1182 Some("America/Sao_Paulo".into()),
1183 "minute",
1184 vec![
1185 "2020-09-08T13:42:00.000000Z",
1186 "2020-09-08T13:43:00.000000Z",
1187 "2020-09-08T13:44:00.000000Z",
1188 ],
1189 ),
1190 (
1192 vec![
1193 "2020-09-08T13:42:29.190855Z",
1194 "2020-09-08T13:43:30.500000Z",
1195 "2020-09-08T13:44:31.999999Z",
1196 ],
1197 None,
1198 "minute",
1199 vec![
1200 "2020-09-08T13:42:00.000000Z",
1201 "2020-09-08T13:43:00.000000Z",
1202 "2020-09-08T13:44:00.000000Z",
1203 ],
1204 ),
1205 (
1207 vec![
1208 "2020-09-08T13:42:29.190855Z",
1209 "2020-09-08T13:42:29.191999Z",
1210 "2020-09-08T13:42:29.192500Z",
1211 ],
1212 Some("Asia/Kolkata".into()),
1213 "millisecond",
1214 vec![
1215 "2020-09-08T19:12:29.190000+05:30",
1216 "2020-09-08T19:12:29.191000+05:30",
1217 "2020-09-08T19:12:29.192000+05:30",
1218 ],
1219 ),
1220 ];
1221
1222 cases
1223 .iter()
1224 .for_each(|(original, tz_opt, granularity, expected)| {
1225 let input = original
1226 .iter()
1227 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1228 .collect::<TimestampNanosecondArray>()
1229 .with_timezone_opt(tz_opt.clone());
1230 let right = expected
1231 .iter()
1232 .map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
1233 .collect::<TimestampNanosecondArray>()
1234 .with_timezone_opt(tz_opt.clone());
1235 let batch_len = input.len();
1236 let arg_fields = vec![
1237 Field::new("a", DataType::Utf8, false).into(),
1238 Field::new("b", input.data_type().clone(), false).into(),
1239 ];
1240 let args = datafusion_expr::ScalarFunctionArgs {
1241 args: vec![
1242 ColumnarValue::Scalar(ScalarValue::from(*granularity)),
1243 ColumnarValue::Array(Arc::new(input)),
1244 ],
1245 arg_fields,
1246 number_rows: batch_len,
1247 return_field: Field::new(
1248 "f",
1249 DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1250 true,
1251 )
1252 .into(),
1253 config_options: Arc::new(ConfigOptions::default()),
1254 };
1255 let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
1256 if let ColumnarValue::Array(result) = result {
1257 assert_eq!(
1258 result.data_type(),
1259 &DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
1260 "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1261 );
1262 let left = as_primitive_array::<TimestampNanosecondType>(&result);
1263 assert_eq!(
1264 left, &right,
1265 "Failed for granularity: {granularity}, timezone: {tz_opt:?}"
1266 );
1267 } else {
1268 panic!("unexpected column type");
1269 }
1270 });
1271 }
1272}