datafusion_functions/datetime/
to_time.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::array::builder::PrimitiveBuilder;
20use arrow::array::cast::AsArray;
21use arrow::array::temporal_conversions::time_to_time64ns;
22use arrow::array::types::Time64NanosecondType;
23use arrow::array::{Array, PrimitiveArray, StringArrayType};
24use arrow::datatypes::DataType;
25use arrow::datatypes::DataType::*;
26use chrono::NaiveTime;
27use datafusion_common::{Result, ScalarValue, exec_err};
28use datafusion_expr::{
29    ColumnarValue, Documentation, ScalarUDFImpl, Signature, Volatility,
30};
31use datafusion_macros::user_doc;
32use std::any::Any;
33use std::sync::Arc;
34
35/// Default time formats to try when parsing without an explicit format
36const DEFAULT_TIME_FORMATS: &[&str] = &[
37    "%H:%M:%S%.f", // 12:30:45.123456789
38    "%H:%M:%S",    // 12:30:45
39    "%H:%M",       // 12:30
40];
41
42#[user_doc(
43    doc_section(label = "Time and Date Functions"),
44    description = r"Converts a value to a time (`HH:MM:SS.nnnnnnnnn`).
45Supports strings and timestamps as input.
46Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
47Timestamps will have the time portion extracted.
48Returns the corresponding time.
49
50Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight.",
51    syntax_example = "to_time('12:30:45', '%H:%M:%S')",
52    sql_example = r#"```sql
53> select to_time('12:30:45');
54+---------------------------+
55| to_time(Utf8("12:30:45")) |
56+---------------------------+
57| 12:30:45                  |
58+---------------------------+
59> select to_time('12-30-45', '%H-%M-%S');
60+--------------------------------------------+
61| to_time(Utf8("12-30-45"),Utf8("%H-%M-%S")) |
62+--------------------------------------------+
63| 12:30:45                                   |
64+--------------------------------------------+
65> select to_time('2024-01-15 14:30:45'::timestamp);
66+--------------------------------------------------+
67| to_time(Utf8("2024-01-15 14:30:45"))             |
68+--------------------------------------------------+
69| 14:30:45                                         |
70+--------------------------------------------------+
71```
72
73Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
74"#,
75    standard_argument(name = "expression", prefix = "String or Timestamp"),
76    argument(
77        name = "format_n",
78        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
79  they appear with the first successful one being returned. If none of the formats successfully parse the expression
80  an error will be returned."
81    )
82)]
83#[derive(Debug, PartialEq, Eq, Hash)]
84pub struct ToTimeFunc {
85    signature: Signature,
86}
87
88impl Default for ToTimeFunc {
89    fn default() -> Self {
90        Self::new()
91    }
92}
93
94impl ToTimeFunc {
95    pub fn new() -> Self {
96        Self {
97            signature: Signature::variadic_any(Volatility::Immutable),
98        }
99    }
100}
101
102impl ScalarUDFImpl for ToTimeFunc {
103    fn as_any(&self) -> &dyn Any {
104        self
105    }
106
107    fn name(&self) -> &str {
108        "to_time"
109    }
110
111    fn signature(&self) -> &Signature {
112        &self.signature
113    }
114
115    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
116        Ok(Time64(arrow::datatypes::TimeUnit::Nanosecond))
117    }
118
119    fn invoke_with_args(
120        &self,
121        args: datafusion_expr::ScalarFunctionArgs,
122    ) -> Result<ColumnarValue> {
123        let args = args.args;
124        if args.is_empty() {
125            return exec_err!("to_time function requires 1 or more arguments, got 0");
126        }
127
128        // validate that any args after the first one are Utf8
129        if args.len() > 1 {
130            validate_data_types(&args, "to_time")?;
131        }
132
133        match args[0].data_type() {
134            Utf8View | LargeUtf8 | Utf8 => string_to_time(&args),
135            Null => Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(None))),
136            // Support timestamp input by extracting time portion using Arrow cast
137            Timestamp(_, _) => timestamp_to_time(&args[0]),
138            other => {
139                exec_err!("Unsupported data type {} for function to_time", other)
140            }
141        }
142    }
143
144    fn documentation(&self) -> Option<&Documentation> {
145        self.doc()
146    }
147}
148
149/// Convert string arguments to time (standalone function, not a method on ToTimeFunc)
150fn string_to_time(args: &[ColumnarValue]) -> Result<ColumnarValue> {
151    let formats = collect_formats(args)?;
152
153    match &args[0] {
154        ColumnarValue::Scalar(ScalarValue::Utf8(s))
155        | ColumnarValue::Scalar(ScalarValue::LargeUtf8(s))
156        | ColumnarValue::Scalar(ScalarValue::Utf8View(s)) => {
157            let result = s
158                .as_ref()
159                .map(|s| parse_time_with_formats(s, &formats))
160                .transpose()?;
161            Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(result)))
162        }
163        ColumnarValue::Array(array) => {
164            let result = match array.data_type() {
165                Utf8 => parse_time_array(&array.as_string::<i32>(), &formats)?,
166                LargeUtf8 => parse_time_array(&array.as_string::<i64>(), &formats)?,
167                Utf8View => parse_time_array(&array.as_string_view(), &formats)?,
168                other => return exec_err!("Unsupported type for to_time: {other}"),
169            };
170            Ok(ColumnarValue::Array(Arc::new(result)))
171        }
172        other => exec_err!("Unsupported argument for to_time: {other:?}"),
173    }
174}
175
176/// Collect format strings from arguments, erroring on non-scalar inputs
177fn collect_formats(args: &[ColumnarValue]) -> Result<Vec<&str>> {
178    if args.len() <= 1 {
179        return Ok(DEFAULT_TIME_FORMATS.to_vec());
180    }
181
182    let mut formats = Vec::with_capacity(args.len() - 1);
183    for (i, arg) in args[1..].iter().enumerate() {
184        match arg {
185            ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)))
186            | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(s)))
187            | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(s))) => {
188                formats.push(s.as_str());
189            }
190            ColumnarValue::Scalar(ScalarValue::Utf8(None))
191            | ColumnarValue::Scalar(ScalarValue::LargeUtf8(None))
192            | ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
193                // Skip null format strings
194            }
195            ColumnarValue::Array(_) => {
196                return exec_err!(
197                    "to_time format argument {} must be a scalar, not an array",
198                    i + 2 // argument position (1-indexed, +1 for the first arg)
199                );
200            }
201            other => {
202                return exec_err!(
203                    "to_time format argument {} has unsupported type: {:?}",
204                    i + 2,
205                    other.data_type()
206                );
207            }
208        }
209    }
210    Ok(formats)
211}
212
213/// Extract time portion from timestamp using Arrow cast kernel
214fn timestamp_to_time(arg: &ColumnarValue) -> Result<ColumnarValue> {
215    arg.cast_to(&Time64(arrow::datatypes::TimeUnit::Nanosecond), None)
216}
217
218/// Parse time array using the provided formats
219fn parse_time_array<'a, A: StringArrayType<'a>>(
220    array: &A,
221    formats: &[&str],
222) -> Result<PrimitiveArray<Time64NanosecondType>> {
223    let mut builder: PrimitiveBuilder<Time64NanosecondType> =
224        PrimitiveArray::builder(array.len());
225
226    for i in 0..array.len() {
227        if array.is_null(i) {
228            builder.append_null();
229        } else {
230            let s = array.value(i);
231            let nanos = parse_time_with_formats(s, formats)?;
232            builder.append_value(nanos);
233        }
234    }
235
236    Ok(builder.finish())
237}
238
239/// Parse time string using provided formats
240fn parse_time_with_formats(s: &str, formats: &[&str]) -> Result<i64> {
241    for format in formats {
242        if let Ok(time) = NaiveTime::parse_from_str(s, format) {
243            // Use Arrow's time_to_time64ns function instead of custom implementation
244            return Ok(time_to_time64ns(time));
245        }
246    }
247    exec_err!(
248        "Error parsing '{}' as time. Tried formats: {:?}",
249        s,
250        formats
251    )
252}