Skip to main content

datafusion_functions/datetime/
to_time.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::datetime::common::*;
19use arrow::array::cast::AsArray;
20use arrow::array::temporal_conversions::time_to_time64ns;
21use arrow::array::types::Time64NanosecondType;
22use arrow::array::{Array, PrimitiveArray, StringArrayType};
23use arrow::datatypes::DataType;
24use arrow::datatypes::DataType::*;
25use chrono::NaiveTime;
26use datafusion_common::{Result, ScalarValue, exec_err};
27use datafusion_expr::{
28    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
29    Volatility,
30};
31use datafusion_macros::user_doc;
32use std::sync::Arc;
33
34/// Default time formats to try when parsing without an explicit format
35const DEFAULT_TIME_FORMATS: &[&str] = &[
36    "%H:%M:%S%.f", // 12:30:45.123456789
37    "%H:%M:%S",    // 12:30:45
38    "%H:%M",       // 12:30
39];
40
41#[user_doc(
42    doc_section(label = "Time and Date Functions"),
43    description = r"Converts a value to a time (`HH:MM:SS.nnnnnnnnn`).
44Supports strings and timestamps as input.
45Strings are parsed as `HH:MM:SS`, `HH:MM:SS.nnnnnnnnn`, or `HH:MM` if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided.
46Timestamps will have the time portion extracted.
47Returns the corresponding time.
48
49Note: `to_time` returns Time64(Nanosecond), which represents the time of day in nanoseconds since midnight.",
50    syntax_example = "to_time('12:30:45', '%H:%M:%S')",
51    sql_example = r#"```sql
52> select to_time('12:30:45');
53+---------------------------+
54| to_time(Utf8("12:30:45")) |
55+---------------------------+
56| 12:30:45                  |
57+---------------------------+
58> select to_time('12-30-45', '%H-%M-%S');
59+--------------------------------------------+
60| to_time(Utf8("12-30-45"),Utf8("%H-%M-%S")) |
61+--------------------------------------------+
62| 12:30:45                                   |
63+--------------------------------------------+
64> select to_time('2024-01-15 14:30:45'::timestamp);
65+--------------------------------------------------+
66| to_time(Utf8("2024-01-15 14:30:45"))             |
67+--------------------------------------------------+
68| 14:30:45                                         |
69+--------------------------------------------------+
70```
71
72Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/builtin_functions/date_time.rs)
73"#,
74    standard_argument(name = "expression", prefix = "String or Timestamp"),
75    argument(
76        name = "format_n",
77        description = r"Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order
78  they appear with the first successful one being returned. If none of the formats successfully parse the expression
79  an error will be returned."
80    )
81)]
82#[derive(Debug, PartialEq, Eq, Hash)]
83pub struct ToTimeFunc {
84    signature: Signature,
85}
86
87impl Default for ToTimeFunc {
88    fn default() -> Self {
89        Self::new()
90    }
91}
92
93impl ToTimeFunc {
94    pub fn new() -> Self {
95        Self {
96            signature: Signature::variadic_any(Volatility::Immutable),
97        }
98    }
99}
100
101impl ScalarUDFImpl for ToTimeFunc {
102    fn name(&self) -> &str {
103        "to_time"
104    }
105
106    fn signature(&self) -> &Signature {
107        &self.signature
108    }
109
110    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
111        Ok(Time64(arrow::datatypes::TimeUnit::Nanosecond))
112    }
113
114    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
115        let args = args.args;
116        if args.is_empty() {
117            return exec_err!("to_time function requires 1 or more arguments, got 0");
118        }
119
120        // validate that any args after the first one are Utf8
121        if args.len() > 1 {
122            validate_data_types(&args, "to_time")?;
123        }
124
125        match args[0].data_type() {
126            Utf8View | LargeUtf8 | Utf8 => string_to_time(&args),
127            Null => Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(None))),
128            // Support timestamp input by extracting time portion using Arrow cast
129            Timestamp(_, _) => timestamp_to_time(&args[0]),
130            other => {
131                exec_err!("Unsupported data type {} for function to_time", other)
132            }
133        }
134    }
135
136    fn documentation(&self) -> Option<&Documentation> {
137        self.doc()
138    }
139}
140
141/// Convert string arguments to time (standalone function, not a method on ToTimeFunc)
142fn string_to_time(args: &[ColumnarValue]) -> Result<ColumnarValue> {
143    let formats = collect_formats(args)?;
144
145    match &args[0] {
146        ColumnarValue::Scalar(ScalarValue::Utf8(s))
147        | ColumnarValue::Scalar(ScalarValue::LargeUtf8(s))
148        | ColumnarValue::Scalar(ScalarValue::Utf8View(s)) => {
149            let result = s
150                .as_ref()
151                .map(|s| parse_time_with_formats(s, &formats))
152                .transpose()?;
153            Ok(ColumnarValue::Scalar(ScalarValue::Time64Nanosecond(result)))
154        }
155        ColumnarValue::Array(array) => {
156            let result = match array.data_type() {
157                Utf8 => parse_time_array(&array.as_string::<i32>(), &formats)?,
158                LargeUtf8 => parse_time_array(&array.as_string::<i64>(), &formats)?,
159                Utf8View => parse_time_array(&array.as_string_view(), &formats)?,
160                other => return exec_err!("Unsupported type for to_time: {other}"),
161            };
162            Ok(ColumnarValue::Array(Arc::new(result)))
163        }
164        other => exec_err!("Unsupported argument for to_time: {other:?}"),
165    }
166}
167
168/// Collect format strings from arguments, erroring on non-scalar inputs
169fn collect_formats(args: &[ColumnarValue]) -> Result<Vec<&str>> {
170    if args.len() <= 1 {
171        return Ok(DEFAULT_TIME_FORMATS.to_vec());
172    }
173
174    let mut formats = Vec::with_capacity(args.len() - 1);
175    for (i, arg) in args[1..].iter().enumerate() {
176        match arg {
177            ColumnarValue::Scalar(ScalarValue::Utf8(Some(s)))
178            | ColumnarValue::Scalar(ScalarValue::LargeUtf8(Some(s)))
179            | ColumnarValue::Scalar(ScalarValue::Utf8View(Some(s))) => {
180                formats.push(s.as_str());
181            }
182            ColumnarValue::Scalar(ScalarValue::Utf8(None))
183            | ColumnarValue::Scalar(ScalarValue::LargeUtf8(None))
184            | ColumnarValue::Scalar(ScalarValue::Utf8View(None)) => {
185                // Skip null format strings
186            }
187            ColumnarValue::Array(_) => {
188                return exec_err!(
189                    "to_time format argument {} must be a scalar, not an array",
190                    i + 2 // argument position (1-indexed, +1 for the first arg)
191                );
192            }
193            other => {
194                return exec_err!(
195                    "to_time format argument {} has unsupported type: {:?}",
196                    i + 2,
197                    other.data_type()
198                );
199            }
200        }
201    }
202    Ok(formats)
203}
204
205/// Extract time portion from timestamp using Arrow cast kernel
206fn timestamp_to_time(arg: &ColumnarValue) -> Result<ColumnarValue> {
207    arg.cast_to(&Time64(arrow::datatypes::TimeUnit::Nanosecond), None)
208}
209
210/// Parse time array using the provided formats
211fn parse_time_array<'a, A: StringArrayType<'a>>(
212    array: &A,
213    formats: &[&str],
214) -> Result<PrimitiveArray<Time64NanosecondType>> {
215    let mut values = Vec::with_capacity(array.len());
216    for i in 0..array.len() {
217        if array.is_null(i) {
218            values.push(0);
219        } else {
220            values.push(parse_time_with_formats(array.value(i), formats)?);
221        }
222    }
223    Ok(PrimitiveArray::new(values.into(), array.nulls().cloned()))
224}
225
226/// Parse time string using provided formats
227fn parse_time_with_formats(s: &str, formats: &[&str]) -> Result<i64> {
228    for format in formats {
229        if let Ok(time) = NaiveTime::parse_from_str(s, format) {
230            // Use Arrow's time_to_time64ns function instead of custom implementation
231            return Ok(time_to_time64ns(time));
232        }
233    }
234    exec_err!(
235        "Error parsing '{}' as time. Tried formats: {:?}",
236        s,
237        formats
238    )
239}