Skip to main content

datafusion_functions/string/
to_hex.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::{Array, ArrayRef, StringArray};
22use arrow::buffer::{Buffer, OffsetBuffer};
23use arrow::datatypes::{
24    ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
25    Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
26};
27use datafusion_common::cast::as_primitive_array;
28use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
29use datafusion_expr::{
30    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
31    TypeSignatureClass, Volatility,
32};
33use datafusion_macros::user_doc;
34
35/// Hex lookup table for fast conversion
36const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
37
38/// Converts the number to its equivalent hexadecimal representation.
39/// to_hex(2147483647) = '7fffffff'
40fn to_hex_array<T: ArrowPrimitiveType>(array: &ArrayRef) -> Result<ArrayRef>
41where
42    T::Native: ToHex,
43{
44    let integer_array = as_primitive_array::<T>(array)?;
45    let len = integer_array.len();
46
47    // Max hex string length: 16 chars for u64/i64
48    let max_hex_len = T::Native::get_byte_width() * 2;
49
50    // Pre-allocate buffers - avoid the builder API overhead
51    let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
52    let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
53
54    // Reusable buffer for hex conversion
55    let mut hex_buffer = [0u8; 16];
56
57    // Start with offset 0
58    offsets.push(0);
59
60    // Process all values directly (including null slots - we write empty strings for nulls)
61    // The null bitmap will mark which entries are actually null
62    for value in integer_array.values() {
63        let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
64        values.extend_from_slice(&hex_buffer[16 - hex_len..]);
65        offsets.push(values.len() as i32);
66    }
67
68    // Copy null bitmap from input (nulls pass through unchanged)
69    let nulls = integer_array.nulls().cloned();
70
71    // SAFETY: offsets are valid (monotonically increasing, last value equals values.len())
72    // and values contains valid UTF-8 (only ASCII hex digits)
73    let offsets =
74        unsafe { OffsetBuffer::new_unchecked(Buffer::from_vec(offsets).into()) };
75    let result = StringArray::new(offsets, Buffer::from_vec(values), nulls);
76
77    Ok(Arc::new(result) as ArrayRef)
78}
79
80#[inline]
81fn to_hex_scalar<T: ToHex>(value: T) -> String {
82    let mut hex_buffer = [0u8; 16];
83    let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
84    // SAFETY: hex_buffer is ASCII hex digits
85    unsafe { std::str::from_utf8_unchecked(&hex_buffer[16 - hex_len..]).to_string() }
86}
87
88/// Trait for converting integer types to hexadecimal in a buffer
89trait ToHex: ArrowNativeType {
90    /// Write hex representation to buffer and return the number of hex digits written.
91    /// The hex digits are written right-aligned in the buffer (starting from position 16 - len).
92    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize;
93}
94
95/// Write unsigned value to hex buffer and return the number of digits written.
96/// Digits are written right-aligned in the buffer.
97#[inline]
98fn write_unsigned_hex_to_buffer(value: u64, buffer: &mut [u8; 16]) -> usize {
99    if value == 0 {
100        buffer[15] = b'0';
101        return 1;
102    }
103
104    // Write hex digits from right to left
105    let mut pos = 16;
106    let mut v = value;
107    while v > 0 {
108        pos -= 1;
109        buffer[pos] = HEX_CHARS[(v & 0xf) as usize];
110        v >>= 4;
111    }
112
113    16 - pos
114}
115
116/// Write signed value to hex buffer (two's complement for negative) and return digit count
117#[inline]
118fn write_signed_hex_to_buffer(value: i64, buffer: &mut [u8; 16]) -> usize {
119    // For negative values, use two's complement representation (same as casting to u64)
120    write_unsigned_hex_to_buffer(value as u64, buffer)
121}
122
123impl ToHex for i8 {
124    #[inline]
125    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
126        write_signed_hex_to_buffer(self as i64, buffer)
127    }
128}
129
130impl ToHex for i16 {
131    #[inline]
132    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
133        write_signed_hex_to_buffer(self as i64, buffer)
134    }
135}
136
137impl ToHex for i32 {
138    #[inline]
139    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
140        write_signed_hex_to_buffer(self as i64, buffer)
141    }
142}
143
144impl ToHex for i64 {
145    #[inline]
146    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
147        write_signed_hex_to_buffer(self, buffer)
148    }
149}
150
151impl ToHex for u8 {
152    #[inline]
153    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
154        write_unsigned_hex_to_buffer(self as u64, buffer)
155    }
156}
157
158impl ToHex for u16 {
159    #[inline]
160    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
161        write_unsigned_hex_to_buffer(self as u64, buffer)
162    }
163}
164
165impl ToHex for u32 {
166    #[inline]
167    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
168        write_unsigned_hex_to_buffer(self as u64, buffer)
169    }
170}
171
172impl ToHex for u64 {
173    #[inline]
174    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
175        write_unsigned_hex_to_buffer(self, buffer)
176    }
177}
178
179#[user_doc(
180    doc_section(label = "String Functions"),
181    description = "Converts an integer to a hexadecimal string.",
182    syntax_example = "to_hex(int)",
183    sql_example = r#"```sql
184> select to_hex(12345689);
185+-------------------------+
186| to_hex(Int64(12345689)) |
187+-------------------------+
188| bc6159                  |
189+-------------------------+
190```"#,
191    standard_argument(name = "int", prefix = "Integer")
192)]
193#[derive(Debug, PartialEq, Eq, Hash)]
194pub struct ToHexFunc {
195    signature: Signature,
196}
197
198impl Default for ToHexFunc {
199    fn default() -> Self {
200        Self::new()
201    }
202}
203
204impl ToHexFunc {
205    pub fn new() -> Self {
206        Self {
207            signature: Signature::coercible(
208                vec![Coercion::new_exact(TypeSignatureClass::Integer)],
209                Volatility::Immutable,
210            ),
211        }
212    }
213}
214
215impl ScalarUDFImpl for ToHexFunc {
216    fn as_any(&self) -> &dyn Any {
217        self
218    }
219
220    fn name(&self) -> &str {
221        "to_hex"
222    }
223
224    fn signature(&self) -> &Signature {
225        &self.signature
226    }
227
228    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
229        Ok(DataType::Utf8)
230    }
231
232    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
233        let arg = &args.args[0];
234
235        match arg {
236            ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) => Ok(
237                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
238            ),
239            ColumnarValue::Scalar(ScalarValue::UInt64(Some(v))) => Ok(
240                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
241            ),
242            ColumnarValue::Scalar(ScalarValue::Int32(Some(v))) => Ok(
243                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
244            ),
245            ColumnarValue::Scalar(ScalarValue::UInt32(Some(v))) => Ok(
246                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
247            ),
248            ColumnarValue::Scalar(ScalarValue::Int16(Some(v))) => Ok(
249                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
250            ),
251            ColumnarValue::Scalar(ScalarValue::UInt16(Some(v))) => Ok(
252                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
253            ),
254            ColumnarValue::Scalar(ScalarValue::Int8(Some(v))) => Ok(
255                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
256            ),
257            ColumnarValue::Scalar(ScalarValue::UInt8(Some(v))) => Ok(
258                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
259            ),
260
261            // NULL scalars
262            ColumnarValue::Scalar(s) if s.is_null() => {
263                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
264            }
265
266            ColumnarValue::Array(array) => match array.data_type() {
267                DataType::Int64 => {
268                    Ok(ColumnarValue::Array(to_hex_array::<Int64Type>(array)?))
269                }
270                DataType::UInt64 => {
271                    Ok(ColumnarValue::Array(to_hex_array::<UInt64Type>(array)?))
272                }
273                DataType::Int32 => {
274                    Ok(ColumnarValue::Array(to_hex_array::<Int32Type>(array)?))
275                }
276                DataType::UInt32 => {
277                    Ok(ColumnarValue::Array(to_hex_array::<UInt32Type>(array)?))
278                }
279                DataType::Int16 => {
280                    Ok(ColumnarValue::Array(to_hex_array::<Int16Type>(array)?))
281                }
282                DataType::UInt16 => {
283                    Ok(ColumnarValue::Array(to_hex_array::<UInt16Type>(array)?))
284                }
285                DataType::Int8 => {
286                    Ok(ColumnarValue::Array(to_hex_array::<Int8Type>(array)?))
287                }
288                DataType::UInt8 => {
289                    Ok(ColumnarValue::Array(to_hex_array::<UInt8Type>(array)?))
290                }
291                other => exec_err!("Unsupported data type {other:?} for function to_hex"),
292            },
293
294            other => internal_err!(
295                "Unexpected argument type {:?} for function to_hex",
296                other.data_type()
297            ),
298        }
299    }
300
301    fn documentation(&self) -> Option<&Documentation> {
302        self.doc()
303    }
304}
305
306#[cfg(test)]
307mod tests {
308    use arrow::array::{
309        Int8Array, Int16Array, Int32Array, Int64Array, StringArray, UInt8Array,
310        UInt16Array, UInt32Array, UInt64Array,
311    };
312    use datafusion_common::cast::as_string_array;
313
314    use super::*;
315
316    macro_rules! test_to_hex_type {
317        // Default test with standard input/output
318        ($name:ident, $arrow_type:ty, $array_type:ty) => {
319            test_to_hex_type!(
320                $name,
321                $arrow_type,
322                $array_type,
323                vec![Some(100), Some(0), None],
324                vec![Some("64"), Some("0"), None]
325            );
326        };
327
328        // Custom test with custom input/output (eg: positive number)
329        ($name:ident, $arrow_type:ty, $array_type:ty, $input:expr, $expected:expr) => {
330            #[test]
331            fn $name() -> Result<()> {
332                let input = $input;
333                let expected = $expected;
334
335                let array = <$array_type>::from(input);
336                let array_ref: ArrayRef = Arc::new(array);
337                let hex_result = to_hex_array::<$arrow_type>(&array_ref)?;
338                let hex_array = as_string_array(&hex_result)?;
339                let expected_array = StringArray::from(expected);
340
341                assert_eq!(&expected_array, hex_array);
342                Ok(())
343            }
344        };
345    }
346
347    test_to_hex_type!(
348        to_hex_int8,
349        Int8Type,
350        Int8Array,
351        vec![Some(100), Some(0), None, Some(-1)],
352        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
353    );
354    test_to_hex_type!(
355        to_hex_int16,
356        Int16Type,
357        Int16Array,
358        vec![Some(100), Some(0), None, Some(-1)],
359        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
360    );
361    test_to_hex_type!(
362        to_hex_int32,
363        Int32Type,
364        Int32Array,
365        vec![Some(100), Some(0), None, Some(-1)],
366        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
367    );
368    test_to_hex_type!(
369        to_hex_int64,
370        Int64Type,
371        Int64Array,
372        vec![Some(100), Some(0), None, Some(-1)],
373        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
374    );
375
376    test_to_hex_type!(to_hex_uint8, UInt8Type, UInt8Array);
377    test_to_hex_type!(to_hex_uint16, UInt16Type, UInt16Array);
378    test_to_hex_type!(to_hex_uint32, UInt32Type, UInt32Array);
379    test_to_hex_type!(to_hex_uint64, UInt64Type, UInt64Array);
380
381    test_to_hex_type!(
382        to_hex_large_signed,
383        Int64Type,
384        Int64Array,
385        vec![Some(i64::MAX), Some(i64::MIN)],
386        vec![Some("7fffffffffffffff"), Some("8000000000000000")]
387    );
388
389    test_to_hex_type!(
390        to_hex_large_unsigned,
391        UInt64Type,
392        UInt64Array,
393        vec![Some(u64::MAX), Some(u64::MIN)],
394        vec![Some("ffffffffffffffff"), Some("0")]
395    );
396}