Skip to main content

datafusion_functions/string/
to_hex.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use std::sync::Arc;
19
20use arrow::array::{Array, ArrayRef, StringArray};
21use arrow::buffer::{Buffer, OffsetBuffer};
22use arrow::datatypes::{
23    ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
24    Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
25};
26use datafusion_common::cast::as_primitive_array;
27use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
28use datafusion_expr::{
29    Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
30    TypeSignatureClass, Volatility,
31};
32use datafusion_macros::user_doc;
33
34/// Hex lookup table for fast conversion
35const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
36
37/// Converts the number to its equivalent hexadecimal representation.
38/// to_hex(2147483647) = '7fffffff'
39fn to_hex_array<T: ArrowPrimitiveType>(array: &ArrayRef) -> Result<ArrayRef>
40where
41    T::Native: ToHex,
42{
43    let integer_array = as_primitive_array::<T>(array)?;
44    let len = integer_array.len();
45
46    // Max hex string length: 16 chars for u64/i64
47    let max_hex_len = T::Native::get_byte_width() * 2;
48
49    // Pre-allocate buffers - avoid the builder API overhead
50    let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
51    let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
52
53    // Reusable buffer for hex conversion
54    let mut hex_buffer = [0u8; 16];
55
56    // Start with offset 0
57    offsets.push(0);
58
59    // Process all values directly (including null slots - we write empty strings for nulls)
60    // The null bitmap will mark which entries are actually null
61    for value in integer_array.values() {
62        let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
63        values.extend_from_slice(&hex_buffer[16 - hex_len..]);
64        offsets.push(values.len() as i32);
65    }
66
67    // Copy null bitmap from input (nulls pass through unchanged)
68    let nulls = integer_array.nulls().cloned();
69
70    // SAFETY: offsets are valid (monotonically increasing, last value equals values.len())
71    // and values contains valid UTF-8 (only ASCII hex digits)
72    let offsets =
73        unsafe { OffsetBuffer::new_unchecked(Buffer::from_vec(offsets).into()) };
74    let result = StringArray::new(offsets, Buffer::from_vec(values), nulls);
75
76    Ok(Arc::new(result) as ArrayRef)
77}
78
79#[inline]
80fn to_hex_scalar<T: ToHex>(value: T) -> String {
81    let mut hex_buffer = [0u8; 16];
82    let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
83    // SAFETY: hex_buffer is ASCII hex digits
84    unsafe { std::str::from_utf8_unchecked(&hex_buffer[16 - hex_len..]).to_string() }
85}
86
87/// Trait for converting integer types to hexadecimal in a buffer
88trait ToHex: ArrowNativeType {
89    /// Write hex representation to buffer and return the number of hex digits written.
90    /// The hex digits are written right-aligned in the buffer (starting from position 16 - len).
91    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize;
92}
93
94/// Write unsigned value to hex buffer and return the number of digits written.
95/// Digits are written right-aligned in the buffer.
96#[inline]
97fn write_unsigned_hex_to_buffer(value: u64, buffer: &mut [u8; 16]) -> usize {
98    if value == 0 {
99        buffer[15] = b'0';
100        return 1;
101    }
102
103    // Write hex digits from right to left
104    let mut pos = 16;
105    let mut v = value;
106    while v > 0 {
107        pos -= 1;
108        buffer[pos] = HEX_CHARS[(v & 0xf) as usize];
109        v >>= 4;
110    }
111
112    16 - pos
113}
114
115/// Write signed value to hex buffer (two's complement for negative) and return digit count
116#[inline]
117fn write_signed_hex_to_buffer(value: i64, buffer: &mut [u8; 16]) -> usize {
118    // For negative values, use two's complement representation (same as casting to u64)
119    write_unsigned_hex_to_buffer(value as u64, buffer)
120}
121
122impl ToHex for i8 {
123    #[inline]
124    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
125        write_signed_hex_to_buffer(self as i64, buffer)
126    }
127}
128
129impl ToHex for i16 {
130    #[inline]
131    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
132        write_signed_hex_to_buffer(self as i64, buffer)
133    }
134}
135
136impl ToHex for i32 {
137    #[inline]
138    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
139        write_signed_hex_to_buffer(self as i64, buffer)
140    }
141}
142
143impl ToHex for i64 {
144    #[inline]
145    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
146        write_signed_hex_to_buffer(self, buffer)
147    }
148}
149
150impl ToHex for u8 {
151    #[inline]
152    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
153        write_unsigned_hex_to_buffer(self as u64, buffer)
154    }
155}
156
157impl ToHex for u16 {
158    #[inline]
159    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
160        write_unsigned_hex_to_buffer(self as u64, buffer)
161    }
162}
163
164impl ToHex for u32 {
165    #[inline]
166    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
167        write_unsigned_hex_to_buffer(self as u64, buffer)
168    }
169}
170
171impl ToHex for u64 {
172    #[inline]
173    fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
174        write_unsigned_hex_to_buffer(self, buffer)
175    }
176}
177
178#[user_doc(
179    doc_section(label = "String Functions"),
180    description = "Converts an integer to a hexadecimal string.",
181    syntax_example = "to_hex(int)",
182    sql_example = r#"```sql
183> select to_hex(12345689);
184+-------------------------+
185| to_hex(Int64(12345689)) |
186+-------------------------+
187| bc6159                  |
188+-------------------------+
189```"#,
190    standard_argument(name = "int", prefix = "Integer")
191)]
192#[derive(Debug, PartialEq, Eq, Hash)]
193pub struct ToHexFunc {
194    signature: Signature,
195}
196
197impl Default for ToHexFunc {
198    fn default() -> Self {
199        Self::new()
200    }
201}
202
203impl ToHexFunc {
204    pub fn new() -> Self {
205        Self {
206            signature: Signature::coercible(
207                vec![Coercion::new_exact(TypeSignatureClass::Integer)],
208                Volatility::Immutable,
209            ),
210        }
211    }
212}
213
214impl ScalarUDFImpl for ToHexFunc {
215    fn name(&self) -> &str {
216        "to_hex"
217    }
218
219    fn signature(&self) -> &Signature {
220        &self.signature
221    }
222
223    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
224        Ok(DataType::Utf8)
225    }
226
227    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
228        let arg = &args.args[0];
229
230        match arg {
231            ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) => Ok(
232                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
233            ),
234            ColumnarValue::Scalar(ScalarValue::UInt64(Some(v))) => Ok(
235                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
236            ),
237            ColumnarValue::Scalar(ScalarValue::Int32(Some(v))) => Ok(
238                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
239            ),
240            ColumnarValue::Scalar(ScalarValue::UInt32(Some(v))) => Ok(
241                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
242            ),
243            ColumnarValue::Scalar(ScalarValue::Int16(Some(v))) => Ok(
244                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
245            ),
246            ColumnarValue::Scalar(ScalarValue::UInt16(Some(v))) => Ok(
247                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
248            ),
249            ColumnarValue::Scalar(ScalarValue::Int8(Some(v))) => Ok(
250                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
251            ),
252            ColumnarValue::Scalar(ScalarValue::UInt8(Some(v))) => Ok(
253                ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
254            ),
255
256            // NULL scalars
257            ColumnarValue::Scalar(s) if s.is_null() => {
258                Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
259            }
260
261            ColumnarValue::Array(array) => match array.data_type() {
262                DataType::Int64 => {
263                    Ok(ColumnarValue::Array(to_hex_array::<Int64Type>(array)?))
264                }
265                DataType::UInt64 => {
266                    Ok(ColumnarValue::Array(to_hex_array::<UInt64Type>(array)?))
267                }
268                DataType::Int32 => {
269                    Ok(ColumnarValue::Array(to_hex_array::<Int32Type>(array)?))
270                }
271                DataType::UInt32 => {
272                    Ok(ColumnarValue::Array(to_hex_array::<UInt32Type>(array)?))
273                }
274                DataType::Int16 => {
275                    Ok(ColumnarValue::Array(to_hex_array::<Int16Type>(array)?))
276                }
277                DataType::UInt16 => {
278                    Ok(ColumnarValue::Array(to_hex_array::<UInt16Type>(array)?))
279                }
280                DataType::Int8 => {
281                    Ok(ColumnarValue::Array(to_hex_array::<Int8Type>(array)?))
282                }
283                DataType::UInt8 => {
284                    Ok(ColumnarValue::Array(to_hex_array::<UInt8Type>(array)?))
285                }
286                other => exec_err!("Unsupported data type {other:?} for function to_hex"),
287            },
288
289            other => internal_err!(
290                "Unexpected argument type {:?} for function to_hex",
291                other.data_type()
292            ),
293        }
294    }
295
296    fn documentation(&self) -> Option<&Documentation> {
297        self.doc()
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use arrow::array::{
304        Int8Array, Int16Array, Int32Array, Int64Array, StringArray, UInt8Array,
305        UInt16Array, UInt32Array, UInt64Array,
306    };
307    use datafusion_common::cast::as_string_array;
308
309    use super::*;
310
311    macro_rules! test_to_hex_type {
312        // Default test with standard input/output
313        ($name:ident, $arrow_type:ty, $array_type:ty) => {
314            test_to_hex_type!(
315                $name,
316                $arrow_type,
317                $array_type,
318                vec![Some(100), Some(0), None],
319                vec![Some("64"), Some("0"), None]
320            );
321        };
322
323        // Custom test with custom input/output (eg: positive number)
324        ($name:ident, $arrow_type:ty, $array_type:ty, $input:expr, $expected:expr) => {
325            #[test]
326            fn $name() -> Result<()> {
327                let input = $input;
328                let expected = $expected;
329
330                let array = <$array_type>::from(input);
331                let array_ref: ArrayRef = Arc::new(array);
332                let hex_result = to_hex_array::<$arrow_type>(&array_ref)?;
333                let hex_array = as_string_array(&hex_result)?;
334                let expected_array = StringArray::from(expected);
335
336                assert_eq!(&expected_array, hex_array);
337                Ok(())
338            }
339        };
340    }
341
342    test_to_hex_type!(
343        to_hex_int8,
344        Int8Type,
345        Int8Array,
346        vec![Some(100), Some(0), None, Some(-1)],
347        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
348    );
349    test_to_hex_type!(
350        to_hex_int16,
351        Int16Type,
352        Int16Array,
353        vec![Some(100), Some(0), None, Some(-1)],
354        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
355    );
356    test_to_hex_type!(
357        to_hex_int32,
358        Int32Type,
359        Int32Array,
360        vec![Some(100), Some(0), None, Some(-1)],
361        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
362    );
363    test_to_hex_type!(
364        to_hex_int64,
365        Int64Type,
366        Int64Array,
367        vec![Some(100), Some(0), None, Some(-1)],
368        vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
369    );
370
371    test_to_hex_type!(to_hex_uint8, UInt8Type, UInt8Array);
372    test_to_hex_type!(to_hex_uint16, UInt16Type, UInt16Array);
373    test_to_hex_type!(to_hex_uint32, UInt32Type, UInt32Array);
374    test_to_hex_type!(to_hex_uint64, UInt64Type, UInt64Array);
375
376    test_to_hex_type!(
377        to_hex_large_signed,
378        Int64Type,
379        Int64Array,
380        vec![Some(i64::MAX), Some(i64::MIN)],
381        vec![Some("7fffffffffffffff"), Some("8000000000000000")]
382    );
383
384    test_to_hex_type!(
385        to_hex_large_unsigned,
386        UInt64Type,
387        UInt64Array,
388        vec![Some(u64::MAX), Some(u64::MIN)],
389        vec![Some("ffffffffffffffff"), Some("0")]
390    );
391}