datafusion_functions/string/
to_hex.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::any::Any;
19use std::sync::Arc;
20
21use crate::utils::make_scalar_function;
22use arrow::array::{Array, ArrayRef, StringArray};
23use arrow::buffer::{Buffer, OffsetBuffer};
24use arrow::datatypes::{
25    ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
26    Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
27};
28use datafusion_common::cast::as_primitive_array;
29use datafusion_common::{Result, ScalarValue, exec_err};
30use datafusion_expr::{
31    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
32    TypeSignatureClass, Volatility,
33};
34use datafusion_macros::user_doc;
35
36/// Hex lookup table for fast conversion
37const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
38
39/// Converts the number to its equivalent hexadecimal representation.
40/// to_hex(2147483647) = '7fffffff'
41fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
42where
43    T::Native: ToHex,
44{
45    let integer_array = as_primitive_array::<T>(&args[0])?;
46    let len = integer_array.len();
47
48    // Max hex string length: 16 chars for u64/i64
49    let max_hex_len = T::Native::get_byte_width() * 2;
50
51    // Pre-allocate buffers - avoid the builder API overhead
52    let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
53    let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
54
55    // Reusable buffer for hex conversion
56    let mut hex_buffer = [0u8; 16];
57
58    // Start with offset 0
59    offsets.push(0);
60
61    // Process all values directly (including null slots - we write empty strings for nulls)
62    // The null bitmap will mark which entries are actually null
63    for value in integer_array.values() {
64        let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
65        values.extend_from_slice(&hex_buffer[16 - hex_len..]);
66        offsets.push(values.len() as i32);
67    }
68
69    // Copy null bitmap from input (nulls pass through unchanged)
70    let nulls = integer_array.nulls().cloned();
71
72    // SAFETY: offsets are valid (monotonically increasing, last value equals values.len())
73    // and values contains valid UTF-8 (only ASCII hex digits)
74    let offsets =
75        unsafe { OffsetBuffer::new_unchecked(Buffer::from_vec(offsets).into()) };
76    let result = StringArray::new(offsets, Buffer::from_vec(values), nulls);
77
78    Ok(Arc::new(result) as ArrayRef)
79}
80
81/// Trait for converting integer types to hexadecimal in a buffer
82trait ToHex: ArrowNativeType {
83    /// Write hex representation to buffer and return the number of hex digits written.
84    /// The hex digits are written right-aligned in the buffer (starting from position 16 - len).
85    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize;
86}
87
88/// Write unsigned value to hex buffer and return the number of digits written.
89/// Digits are written right-aligned in the buffer.
90#[inline]
91fn write_unsigned_hex_to_buffer(value: u64, buffer: &mut [u8; 16]) -> usize {
92    if value == 0 {
93        buffer[15] = b'0';
94        return 1;
95    }
96
97    // Write hex digits from right to left
98    let mut pos = 16;
99    let mut v = value;
100    while v > 0 {
101        pos -= 1;
102        buffer[pos] = HEX_CHARS[(v & 0xf) as usize];
103        v >>= 4;
104    }
105
106    16 - pos
107}
108
109/// Write signed value to hex buffer (two's complement for negative) and return digit count
110#[inline]
111fn write_signed_hex_to_buffer(value: i64, buffer: &mut [u8; 16]) -> usize {
112    // For negative values, use two's complement representation (same as casting to u64)
113    write_unsigned_hex_to_buffer(value as u64, buffer)
114}
115
116impl ToHex for i8 {
117    #[inline]
118    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
119        write_signed_hex_to_buffer(self as i64, buffer)
120    }
121}
122
123impl ToHex for i16 {
124    #[inline]
125    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
126        write_signed_hex_to_buffer(self as i64, buffer)
127    }
128}
129
130impl ToHex for i32 {
131    #[inline]
132    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
133        write_signed_hex_to_buffer(self as i64, buffer)
134    }
135}
136
137impl ToHex for i64 {
138    #[inline]
139    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
140        write_signed_hex_to_buffer(self, buffer)
141    }
142}
143
144impl ToHex for u8 {
145    #[inline]
146    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
147        write_unsigned_hex_to_buffer(self as u64, buffer)
148    }
149}
150
151impl ToHex for u16 {
152    #[inline]
153    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
154        write_unsigned_hex_to_buffer(self as u64, buffer)
155    }
156}
157
158impl ToHex for u32 {
159    #[inline]
160    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
161        write_unsigned_hex_to_buffer(self as u64, buffer)
162    }
163}
164
165impl ToHex for u64 {
166    #[inline]
167    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
168        write_unsigned_hex_to_buffer(self, buffer)
169    }
170}
171
172#[user_doc(
173    doc_section(label = "String Functions"),
174    description = "Converts an integer to a hexadecimal string.",
175    syntax_example = "to_hex(int)",
176    sql_example = r#"```sql
177> select to_hex(12345689);
178+-------------------------+
179| to_hex(Int64(12345689)) |
180+-------------------------+
181| bc6159                  |
182+-------------------------+
183```"#,
184    standard_argument(name = "int", prefix = "Integer")
185)]
186#[derive(Debug, PartialEq, Eq, Hash)]
187pub struct ToHexFunc {
188    signature: Signature,
189}
190
191impl Default for ToHexFunc {
192    fn default() -> Self {
193        Self::new()
194    }
195}
196
197impl ToHexFunc {
198    pub fn new() -> Self {
199        Self {
200            signature: Signature::coercible(
201                vec![Coercion::new_exact(TypeSignatureClass::Integer)],
202                Volatility::Immutable,
203            ),
204        }
205    }
206}
207
208impl ScalarUDFImpl for ToHexFunc {
209    fn as_any(&self) -> &dyn Any {
210        self
211    }
212
213    fn name(&self) -> &str {
214        "to_hex"
215    }
216
217    fn signature(&self) -> &Signature {
218        &self.signature
219    }
220
221    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
222        Ok(DataType::Utf8)
223    }
224
225    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
226        match args.args[0].data_type() {
227            DataType::Null => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
228            DataType::Int64 => {
229                make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args)
230            }
231            DataType::UInt64 => {
232                make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args)
233            }
234            DataType::Int32 => {
235                make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args)
236            }
237            DataType::UInt32 => {
238                make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args)
239            }
240            DataType::Int16 => {
241                make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args)
242            }
243            DataType::UInt16 => {
244                make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args)
245            }
246            DataType::Int8 => {
247                make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args)
248            }
249            DataType::UInt8 => {
250                make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args)
251            }
252            other => exec_err!("Unsupported data type {other:?} for function to_hex"),
253        }
254    }
255
256    fn documentation(&self) -> Option<&Documentation> {
257        self.doc()
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use arrow::array::{
264        Int8Array, Int16Array, Int32Array, Int64Array, StringArray, UInt8Array,
265        UInt16Array, UInt32Array, UInt64Array,
266    };
267    use datafusion_common::cast::as_string_array;
268
269    use super::*;
270
271    macro_rules! test_to_hex_type {
272        // Default test with standard input/output
273        ($name:ident, $arrow_type:ty, $array_type:ty) => {
274            test_to_hex_type!(
275                $name,
276                $arrow_type,
277                $array_type,
278                vec![Some(100), Some(0), None],
279                vec![Some("64"), Some("0"), None]
280            );
281        };
282
283        // Custom test with custom input/output (eg: positive number)
284        ($name:ident, $arrow_type:ty, $array_type:ty, $input:expr, $expected:expr) => {
285            #[test]
286            fn $name() -> Result<()> {
287                let input = $input;
288                let expected = $expected;
289
290                let array = <$array_type>::from(input);
291                let array_ref = Arc::new(array);
292                let hex_result = to_hex::<$arrow_type>(&[array_ref])?;
293                let hex_array = as_string_array(&hex_result)?;
294                let expected_array = StringArray::from(expected);
295
296                assert_eq!(&expected_array, hex_array);
297                Ok(())
298            }
299        };
300    }
301
302    test_to_hex_type!(
303        to_hex_int8,
304        Int8Type,
305        Int8Array,
306        vec![Some(100), Some(0), None, Some(-1)],
307        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
308    );
309    test_to_hex_type!(
310        to_hex_int16,
311        Int16Type,
312        Int16Array,
313        vec![Some(100), Some(0), None, Some(-1)],
314        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
315    );
316    test_to_hex_type!(
317        to_hex_int32,
318        Int32Type,
319        Int32Array,
320        vec![Some(100), Some(0), None, Some(-1)],
321        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
322    );
323    test_to_hex_type!(
324        to_hex_int64,
325        Int64Type,
326        Int64Array,
327        vec![Some(100), Some(0), None, Some(-1)],
328        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
329    );
330
331    test_to_hex_type!(to_hex_uint8, UInt8Type, UInt8Array);
332    test_to_hex_type!(to_hex_uint16, UInt16Type, UInt16Array);
333    test_to_hex_type!(to_hex_uint32, UInt32Type, UInt32Array);
334    test_to_hex_type!(to_hex_uint64, UInt64Type, UInt64Array);
335
336    test_to_hex_type!(
337        to_hex_large_signed,
338        Int64Type,
339        Int64Array,
340        vec![Some(i64::MAX), Some(i64::MIN)],
341        vec![Some("7fffffffffffffff"), Some("8000000000000000")]
342    );
343
344    test_to_hex_type!(
345        to_hex_large_unsigned,
346        UInt64Type,
347        UInt64Array,
348        vec![Some(u64::MAX), Some(u64::MIN)],
349        vec![Some("ffffffffffffffff"), Some("0")]
350    );
351}