datafusion_functions/datetime/mod.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! date & time DataFusion functions
19
20use std::sync::Arc;
21
22use datafusion_expr::ScalarUDF;
23
24pub mod common;
25pub mod current_date;
26pub mod current_time;
27pub mod date_bin;
28pub mod date_part;
29pub mod date_trunc;
30pub mod from_unixtime;
31pub mod make_date;
32pub mod make_time;
33pub mod now;
34pub mod planner;
35pub mod to_char;
36pub mod to_date;
37pub mod to_local_time;
38pub mod to_time;
39pub mod to_timestamp;
40pub mod to_unixtime;
41
42// create UDFs
43make_udf_function!(current_date::CurrentDateFunc, current_date);
44make_udf_function!(current_time::CurrentTimeFunc, current_time);
45make_udf_function!(date_bin::DateBinFunc, date_bin);
46make_udf_function!(date_part::DatePartFunc, date_part);
47make_udf_function!(date_trunc::DateTruncFunc, date_trunc);
48make_udf_function!(make_date::MakeDateFunc, make_date);
49make_udf_function!(make_time::MakeTimeFunc, make_time);
50make_udf_function!(from_unixtime::FromUnixtimeFunc, from_unixtime);
51make_udf_function!(to_char::ToCharFunc, to_char);
52make_udf_function!(to_date::ToDateFunc, to_date);
53make_udf_function!(to_local_time::ToLocalTimeFunc, to_local_time);
54make_udf_function!(to_time::ToTimeFunc, to_time);
55make_udf_function!(to_unixtime::ToUnixtimeFunc, to_unixtime);
56make_udf_function_with_config!(to_timestamp::ToTimestampFunc, to_timestamp);
57make_udf_function_with_config!(
58 to_timestamp::ToTimestampSecondsFunc,
59 to_timestamp_seconds
60);
61make_udf_function_with_config!(to_timestamp::ToTimestampMillisFunc, to_timestamp_millis);
62make_udf_function_with_config!(to_timestamp::ToTimestampMicrosFunc, to_timestamp_micros);
63make_udf_function_with_config!(to_timestamp::ToTimestampNanosFunc, to_timestamp_nanos);
64
65// create UDF with config
66make_udf_function_with_config!(now::NowFunc, now);
67
68// we cannot currently use the export_functions macro since it doesn't handle
69// functions with varargs currently
70
71pub mod expr_fn {
72 use datafusion_expr::Expr;
73
74 export_functions!((
75 current_date,
76 "returns current UTC date as a Date32 value",
77 ),(
78 current_time,
79 "returns current UTC time as a Time64 value",
80 ),(
81 from_unixtime,
82 "converts an integer to RFC3339 timestamp format string",
83 unixtime
84 ),(
85 date_bin,
86 "coerces an arbitrary timestamp to the start of the nearest specified interval",
87 stride source origin
88 ),(
89 date_part,
90 "extracts a subfield from the date",
91 part date
92 ),(
93 date_trunc,
94 "truncates the date to a specified level of precision",
95 part date
96 ),(
97 make_date,
98 "make a date from year, month and day component parts",
99 year month day
100 ),(
101 make_time,
102 "make a time from hour, minute and second component parts",
103 hour minute second
104 ),(
105 now,
106 "returns the current timestamp in nanoseconds, using the same value for all instances of now() in same statement",
107 @config
108 ),
109 (
110 to_local_time,
111 "converts a timezone-aware timestamp to local time (with no offset or timezone information), i.e. strips off the timezone from the timestamp",
112 args,
113 ),
114 (
115 to_unixtime,
116 "converts a value to seconds since the unix epoch",
117 args,
118 ),(
119 to_time,
120 "converts a string and optional formats to a `Time64(Nanoseconds)`",
121 args,
122 ),(
123 to_timestamp,
124 "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`",
125 @config args,
126 ),(
127 to_timestamp_seconds,
128 "converts a string and optional formats to a `Timestamp(Seconds, TimeZone)`",
129 @config args,
130 ),(
131 to_timestamp_millis,
132 "converts a string and optional formats to a `Timestamp(Milliseconds, TimeZone)`",
133 @config args,
134 ),(
135 to_timestamp_micros,
136 "converts a string and optional formats to a `Timestamp(Microseconds, TimeZone)`",
137 @config args,
138 ),(
139 to_timestamp_nanos,
140 "converts a string and optional formats to a `Timestamp(Nanoseconds, TimeZone)`",
141 @config args,
142 ));
143
144 /// Returns a string representation of a date, time, timestamp or duration based
145 /// on a Chrono pattern.
146 ///
147 /// The syntax for the patterns can be found at
148 /// <https://docs.rs/chrono/latest/chrono/format/strftime/index.html>
149 ///
150 /// # Examples
151 ///
152 /// ```ignore
153 /// # use chrono::prelude::*;
154 /// # use datafusion::prelude::*;
155 /// # use datafusion::error::Result;
156 /// # use datafusion_common::ScalarValue::TimestampNanosecond;
157 /// # use std::sync::Arc;
158 /// # use arrow::array::{Date32Array, RecordBatch, StringArray};
159 /// # use arrow::datatypes::{DataType, Field, Schema};
160 /// # #[tokio::main]
161 /// # async fn main() -> Result<()> {
162 /// let schema = Arc::new(Schema::new(vec![
163 /// Field::new("values", DataType::Date32, false),
164 /// Field::new("patterns", DataType::Utf8, false),
165 /// ]));
166 ///
167 /// let batch = RecordBatch::try_new(
168 /// schema,
169 /// vec![
170 /// Arc::new(Date32Array::from(vec![
171 /// 18506,
172 /// 18507,
173 /// 18508,
174 /// 18509,
175 /// ])),
176 /// Arc::new(StringArray::from(vec![
177 /// "%Y-%m-%d",
178 /// "%Y:%m:%d",
179 /// "%Y%m%d",
180 /// "%d-%m-%Y",
181 /// ])),
182 /// ],
183 /// )?;
184 ///
185 /// let ctx = SessionContext::new();
186 /// ctx.register_batch("t", batch)?;
187 /// let df = ctx.table("t").await?;
188 ///
189 /// // use the to_char function to convert col 'values',
190 /// // to strings using patterns in col 'patterns'
191 /// let df = df.with_column(
192 /// "date_str",
193 /// to_char(col("values"), col("patterns"))
194 /// )?;
195 /// // Note that providing a scalar value for the pattern
196 /// // is more performant
197 /// let df = df.with_column(
198 /// "date_str2",
199 /// to_char(col("values"), lit("%d-%m-%Y"))
200 /// )?;
201 /// // literals can be used as well with dataframe calls
202 /// let timestamp = "2026-07-08T09:10:11"
203 /// .parse::<NaiveDateTime>()
204 /// .unwrap()
205 /// .with_nanosecond(56789)
206 /// .unwrap()
207 /// .timestamp_nanos_opt()
208 /// .unwrap();
209 /// let df = df.with_column(
210 /// "timestamp_str",
211 /// to_char(lit(TimestampNanosecond(Some(timestamp), None)), lit("%d-%m-%Y %H:%M:%S"))
212 /// )?;
213 ///
214 /// df.show().await?;
215 ///
216 /// # Ok(())
217 /// # }
218 /// ```
219 pub fn to_char(datetime: Expr, format: Expr) -> Expr {
220 super::to_char().call(vec![datetime, format])
221 }
222
223 /// ```ignore
224 /// # use std::sync::Arc;
225 ///
226 /// # use datafusion_common::Result;
227 ///
228 /// # #[tokio::main]
229 /// # async fn main() -> Result<()> {
230 /// # use arrow::array::StringArray;
231 /// # use arrow::datatypes::{DataType, Field, Schema};
232 /// # use arrow::record_batch::RecordBatch;
233 /// # use datafusion_expr::col;
234 /// # use datafusion::prelude::*;
235 /// # use datafusion_functions::expr_fn::to_date;
236 ///
237 /// // define a schema.
238 /// let schema = Arc::new(Schema::new(vec![Field::new("a", DataType::Utf8, false)]));
239 ///
240 /// // define data.
241 /// let batch = RecordBatch::try_new(
242 /// schema,
243 /// vec![Arc::new(StringArray::from(vec![
244 /// "2020-09-08T13:42:29Z",
245 /// "2020-09-08T13:42:29.190855-05:00",
246 /// "2020-08-09 12:13:29",
247 /// "2020-01-02",
248 /// ]))],
249 /// )?;
250 ///
251 /// // declare a new context. In spark API, this corresponds to a new spark SQLsession
252 /// let ctx = SessionContext::new();
253 ///
254 /// // declare a table in memory. In spark API, this corresponds to createDataFrame(...).
255 /// ctx.register_batch("t", batch)?;
256 /// let df = ctx.table("t").await?;
257 ///
258 /// // use to_date function to convert col 'a' to timestamp type using the default parsing
259 /// let df = df.with_column("a", to_date(vec![col("a")]))?;
260 ///
261 /// let df = df.select_columns(&["a"])?;
262 ///
263 /// // print the results
264 /// df.show().await?;
265 ///
266 /// # Ok(())
267 /// # }
268 /// ```
269 pub fn to_date(args: Vec<Expr>) -> Expr {
270 super::to_date().call(args)
271 }
272}
273
274/// Returns all DataFusion functions defined in this package
275pub fn functions() -> Vec<Arc<ScalarUDF>> {
276 use datafusion_common::config::ConfigOptions;
277 let config = ConfigOptions::default();
278 vec![
279 current_date(),
280 current_time(),
281 date_bin(),
282 date_part(),
283 date_trunc(),
284 from_unixtime(),
285 make_date(),
286 make_time(),
287 now(&config),
288 to_char(),
289 to_date(),
290 to_local_time(),
291 to_time(),
292 to_unixtime(),
293 to_timestamp(&config),
294 to_timestamp_seconds(&config),
295 to_timestamp_millis(&config),
296 to_timestamp_micros(&config),
297 to_timestamp_nanos(&config),
298 ]
299}