datafusion_functions/datetime/
to_time.rs1use crate::datetime::common::*;
19use arrow::array::builder::PrimitiveBuilder;
20use arrow::array::cast::AsArray;
21use arrow::array::temporal_conversions::time_to_time64ns;
22use arrow::array::types::Time64NanosecondType;
23use arrow::array::{Array, PrimitiveArray, StringArrayType};
24use arrow::datatypes::DataType;
25use arrow::datatypes::DataType::*;
26use chrono::NaiveTime;
27use datafusion_common::{Result, ScalarValue, exec_err};
28use datafusion_expr::{
29 ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
30};
31use datafusion_macros::user_doc;
32use std::any::Any;
33use std::sync::Arc;
34
35const DEFAULT_TIME_FORMATS: &[&str] = &[
37 "%H:%M:%S%.f", "%H:%M:%S", "%H:%M", ];
41
42#[user_doc(
43 doc_section(label = "Time and Date Functions"),
44 description = r"Converts a value to a time (`HH:MM:SS.nnnnnnnnn`).
45Supports strings and timestamps as input.
46Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
47Timestamps will have the time portion extracted.
48Returns the corresponding time.
49
50Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight.",
51 syntax_example = "to_time('12:30:45', '%H:%M:%S')",
52 sql_example = r#"```sql
53> select to_time('12:30:45');
54+---------------------------+
55| to_time(Utf8("12:30:45")) |
56+---------------------------+
57| 12:30:45 |
58+---------------------------+
59> select to_time('12-30-45', '%H-%M-%S');
60+--------------------------------------------+
61| to_time(Utf8("12-30-45"),Utf8("%H-%M-%S")) |
62+--------------------------------------------+
63| 12:30:45 |
64+--------------------------------------------+
65> select to_time('2024-01-15 14:30:45'::timestamp);
66+--------------------------------------------------+
67| to_time(Utf8("2024-01-15 14:30:45")) |
68+--------------------------------------------------+
69| 14:30:45 |
70+--------------------------------------------------+
71```
72
73Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
74"#,
75 standard_argument(name = "expression", prefix = "String or Timestamp"),
76 argument(
77 name = "format_n",
78 description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
79 they appear with the first successful one being returned. If none of the formats successfully parse the expression
80 an error will be returned."
81 )
82)]
83#[derive(Debug, PartialEq, Eq, Hash)]
84pub struct ToTimeFunc {
85 signature: Signature,
86}
87
88impl Default for ToTimeFunc {
89 fn default() -> Self {
90 Self::new()
91 }
92}
93
94impl ToTimeFunc {
95 pub fn new() -> Self {
96 Self {
97 signature: Signature::variadic_any(Volatility::Immutable),
98 }
99 }
100}
101
102impl ScalarUDFImpl for ToTimeFunc {
103 fn as_any(&self) -> &dyn Any {
104 self
105 }
106
107 fn name(&self) -> &str {
108 "to_time"
109 }
110
111 fn signature(&self) -> &Signature {
112 &self.signature
113 }
114
115 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
116 Ok(Time64(arrow::datatypes::TimeUnit::Nanosecond))
117 }
118
119 fn invoke_with_args(
120 &self,
121 args: datafusion_expr::ScalarFunctionArgs,
122 ) -> Result<ColumnarValue> {
123 let args = args.args;
124 if args.is_empty() {
125 return exec_err!("to_time function requires 1 or more arguments, got 0");
126 }
127
128 if args.len() > 1 {
130 validate_data_types(&args, "to_time")?;
131 }
132
133 match args[0].data_type() {
134 Utf8View | LargeUtf8 | Utf8 => string_to_time(&args),
135 Null => Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(None))),
136 Timestamp(_, _) => timestamp_to_time(&args[0]),
138 other => {
139 exec_err!("Unsupported data type {} for function to_time", other)
140 }
141 }
142 }
143
144 fn documentation(&self) -> Option<&Documentation> {
145 self.doc()
146 }
147}
148
149fn string_to_time(args: &[ColumnarValue]) -> Result<ColumnarValue> {
151 let formats = collect_formats(args)?;
152
153 match &args[0] {
154 ColumnarValue::Scalar(ScalarValue::Utf8(s))
155 | ColumnarValue::Scalar(ScalarValue::LargeUtf8(s))
156 | ColumnarValue::Scalar(ScalarValue::Utf8View(s)) => {
157 let result = s
158 .as_ref()
159 .map(|s| parse_time_with_formats(s, &formats))
160 .transpose()?;
161 Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(result)))
162 }
163 ColumnarValue::Array(array) => {
164 let result = match array.data_type() {
165 Utf8 => parse_time_array(&array.as_string::<i32>(), &formats)?,
166 LargeUtf8 => parse_time_array(&array.as_string::<i64>(), &formats)?,
167 Utf8View => parse_time_array(&array.as_string_view(), &formats)?,
168 other => return exec_err!("Unsupported type for to_time: {other}"),
169 };
170 Ok(ColumnarValue::Array(Arc::new(result)))
171 }
172 other => exec_err!("Unsupported argument for to_time: {other:?}"),
173 }
174}
175
176fn collect_formats(args: &[ColumnarValue]) -> Result<Vec<&str>> {
178 if args.len() <= 1 {
179 return Ok(DEFAULT_TIME_FORMATS.to_vec());
180 }
181
182 let mut formats = Vec::with_capacity(args.len() - 1);
183 for (i, arg) in args[1..].iter().enumerate() {
184 match arg {
185 ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)))
186 | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(s)))
187 | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(s))) => {
188 formats.push(s.as_str());
189 }
190 ColumnarValue::Scalar(ScalarValue::Utf8(None))
191 | ColumnarValue::Scalar(ScalarValue::LargeUtf8(None))
192 | ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
193 }
195 ColumnarValue::Array(_) => {
196 return exec_err!(
197 "to_time format argument {} must be a scalar, not an array",
198 i + 2 );
200 }
201 other => {
202 return exec_err!(
203 "to_time format argument {} has unsupported type: {:?}",
204 i + 2,
205 other.data_type()
206 );
207 }
208 }
209 }
210 Ok(formats)
211}
212
213fn timestamp_to_time(arg: &ColumnarValue) -> Result<ColumnarValue> {
215 arg.cast_to(&Time64(arrow::datatypes::TimeUnit::Nanosecond), None)
216}
217
218fn parse_time_array<'a, A: StringArrayType<'a>>(
220 array: &A,
221 formats: &[&str],
222) -> Result<PrimitiveArray<Time64NanosecondType>> {
223 let mut builder: PrimitiveBuilder<Time64NanosecondType> =
224 PrimitiveArray::builder(array.len());
225
226 for i in 0..array.len() {
227 if array.is_null(i) {
228 builder.append_null();
229 } else {
230 let s = array.value(i);
231 let nanos = parse_time_with_formats(s, formats)?;
232 builder.append_value(nanos);
233 }
234 }
235
236 Ok(builder.finish())
237}
238
239fn parse_time_with_formats(s: &str, formats: &[&str]) -> Result<i64> {
241 for format in formats {
242 if let Ok(time) = NaiveTime::parse_from_str(s, format) {
243 return Ok(time_to_time64ns(time));
245 }
246 }
247 exec_err!(
248 "Error parsing '{}' as time. Tried formats: {:?}",
249 s,
250 formats
251 )
252}