datafusion_functions/string/
to_hex.rs1use std::any::Any;
19use std::sync::Arc;
20
21use crate::utils::make_scalar_function;
22use arrow::array::{Array, ArrayRef, StringArray};
23use arrow::buffer::{Buffer, OffsetBuffer};
24use arrow::datatypes::{
25 ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
26 Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
27};
28use datafusion_common::cast::as_primitive_array;
29use datafusion_common::{Result, ScalarValue, exec_err};
30use datafusion_expr::{
31 Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
32 TypeSignatureClass, Volatility,
33};
34use datafusion_macros::user_doc;
35
36const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
38
39fn to_hex<T: ArrowPrimitiveType>(args: &[ArrayRef]) -> Result<ArrayRef>
42where
43 T::Native: ToHex,
44{
45 let integer_array = as_primitive_array::<T>(&args[0])?;
46 let len = integer_array.len();
47
48 let max_hex_len = T::Native::get_byte_width() * 2;
50
51 let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
53 let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
54
55 let mut hex_buffer = [0u8; 16];
57
58 offsets.push(0);
60
61 for value in integer_array.values() {
64 let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
65 values.extend_from_slice(&hex_buffer[16 - hex_len..]);
66 offsets.push(values.len() as i32);
67 }
68
69 let nulls = integer_array.nulls().cloned();
71
72 let offsets =
75 unsafe { OffsetBuffer::new_unchecked(Buffer::from_vec(offsets).into()) };
76 let result = StringArray::new(offsets, Buffer::from_vec(values), nulls);
77
78 Ok(Arc::new(result) as ArrayRef)
79}
80
81trait ToHex: ArrowNativeType {
83 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize;
86}
87
88#[inline]
91fn write_unsigned_hex_to_buffer(value: u64, buffer: &mut [u8; 16]) -> usize {
92 if value == 0 {
93 buffer[15] = b'0';
94 return 1;
95 }
96
97 let mut pos = 16;
99 let mut v = value;
100 while v > 0 {
101 pos -= 1;
102 buffer[pos] = HEX_CHARS[(v & 0xf) as usize];
103 v >>= 4;
104 }
105
106 16 - pos
107}
108
109#[inline]
111fn write_signed_hex_to_buffer(value: i64, buffer: &mut [u8; 16]) -> usize {
112 write_unsigned_hex_to_buffer(value as u64, buffer)
114}
115
116impl ToHex for i8 {
117 #[inline]
118 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
119 write_signed_hex_to_buffer(self as i64, buffer)
120 }
121}
122
123impl ToHex for i16 {
124 #[inline]
125 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
126 write_signed_hex_to_buffer(self as i64, buffer)
127 }
128}
129
130impl ToHex for i32 {
131 #[inline]
132 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
133 write_signed_hex_to_buffer(self as i64, buffer)
134 }
135}
136
137impl ToHex for i64 {
138 #[inline]
139 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
140 write_signed_hex_to_buffer(self, buffer)
141 }
142}
143
144impl ToHex for u8 {
145 #[inline]
146 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
147 write_unsigned_hex_to_buffer(self as u64, buffer)
148 }
149}
150
151impl ToHex for u16 {
152 #[inline]
153 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
154 write_unsigned_hex_to_buffer(self as u64, buffer)
155 }
156}
157
158impl ToHex for u32 {
159 #[inline]
160 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
161 write_unsigned_hex_to_buffer(self as u64, buffer)
162 }
163}
164
165impl ToHex for u64 {
166 #[inline]
167 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
168 write_unsigned_hex_to_buffer(self, buffer)
169 }
170}
171
172#[user_doc(
173 doc_section(label = "String Functions"),
174 description = "Converts an integer to a hexadecimal string.",
175 syntax_example = "to_hex(int)",
176 sql_example = r#"```sql
177> select to_hex(12345689);
178+-------------------------+
179| to_hex(Int64(12345689)) |
180+-------------------------+
181| bc6159 |
182+-------------------------+
183```"#,
184 standard_argument(name = "int", prefix = "Integer")
185)]
186#[derive(Debug, PartialEq, Eq, Hash)]
187pub struct ToHexFunc {
188 signature: Signature,
189}
190
191impl Default for ToHexFunc {
192 fn default() -> Self {
193 Self::new()
194 }
195}
196
197impl ToHexFunc {
198 pub fn new() -> Self {
199 Self {
200 signature: Signature::coercible(
201 vec![Coercion::new_exact(TypeSignatureClass::Integer)],
202 Volatility::Immutable,
203 ),
204 }
205 }
206}
207
208impl ScalarUDFImpl for ToHexFunc {
209 fn as_any(&self) -> &dyn Any {
210 self
211 }
212
213 fn name(&self) -> &str {
214 "to_hex"
215 }
216
217 fn signature(&self) -> &Signature {
218 &self.signature
219 }
220
221 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
222 Ok(DataType::Utf8)
223 }
224
225 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
226 match args.args[0].data_type() {
227 DataType::Null => Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None))),
228 DataType::Int64 => {
229 make_scalar_function(to_hex::<Int64Type>, vec![])(&args.args)
230 }
231 DataType::UInt64 => {
232 make_scalar_function(to_hex::<UInt64Type>, vec![])(&args.args)
233 }
234 DataType::Int32 => {
235 make_scalar_function(to_hex::<Int32Type>, vec![])(&args.args)
236 }
237 DataType::UInt32 => {
238 make_scalar_function(to_hex::<UInt32Type>, vec![])(&args.args)
239 }
240 DataType::Int16 => {
241 make_scalar_function(to_hex::<Int16Type>, vec![])(&args.args)
242 }
243 DataType::UInt16 => {
244 make_scalar_function(to_hex::<UInt16Type>, vec![])(&args.args)
245 }
246 DataType::Int8 => {
247 make_scalar_function(to_hex::<Int8Type>, vec![])(&args.args)
248 }
249 DataType::UInt8 => {
250 make_scalar_function(to_hex::<UInt8Type>, vec![])(&args.args)
251 }
252 other => exec_err!("Unsupported data type {other:?} for function to_hex"),
253 }
254 }
255
256 fn documentation(&self) -> Option<&Documentation> {
257 self.doc()
258 }
259}
260
261#[cfg(test)]
262mod tests {
263 use arrow::array::{
264 Int8Array, Int16Array, Int32Array, Int64Array, StringArray, UInt8Array,
265 UInt16Array, UInt32Array, UInt64Array,
266 };
267 use datafusion_common::cast::as_string_array;
268
269 use super::*;
270
271 macro_rules! test_to_hex_type {
272 ($name:ident, $arrow_type:ty, $array_type:ty) => {
274 test_to_hex_type!(
275 $name,
276 $arrow_type,
277 $array_type,
278 vec![Some(100), Some(0), None],
279 vec![Some("64"), Some("0"), None]
280 );
281 };
282
283 ($name:ident, $arrow_type:ty, $array_type:ty, $input:expr, $expected:expr) => {
285 #[test]
286 fn $name() -> Result<()> {
287 let input = $input;
288 let expected = $expected;
289
290 let array = <$array_type>::from(input);
291 let array_ref = Arc::new(array);
292 let hex_result = to_hex::<$arrow_type>(&[array_ref])?;
293 let hex_array = as_string_array(&hex_result)?;
294 let expected_array = StringArray::from(expected);
295
296 assert_eq!(&expected_array, hex_array);
297 Ok(())
298 }
299 };
300 }
301
302 test_to_hex_type!(
303 to_hex_int8,
304 Int8Type,
305 Int8Array,
306 vec![Some(100), Some(0), None, Some(-1)],
307 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
308 );
309 test_to_hex_type!(
310 to_hex_int16,
311 Int16Type,
312 Int16Array,
313 vec![Some(100), Some(0), None, Some(-1)],
314 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
315 );
316 test_to_hex_type!(
317 to_hex_int32,
318 Int32Type,
319 Int32Array,
320 vec![Some(100), Some(0), None, Some(-1)],
321 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
322 );
323 test_to_hex_type!(
324 to_hex_int64,
325 Int64Type,
326 Int64Array,
327 vec![Some(100), Some(0), None, Some(-1)],
328 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
329 );
330
331 test_to_hex_type!(to_hex_uint8, UInt8Type, UInt8Array);
332 test_to_hex_type!(to_hex_uint16, UInt16Type, UInt16Array);
333 test_to_hex_type!(to_hex_uint32, UInt32Type, UInt32Array);
334 test_to_hex_type!(to_hex_uint64, UInt64Type, UInt64Array);
335
336 test_to_hex_type!(
337 to_hex_large_signed,
338 Int64Type,
339 Int64Array,
340 vec![Some(i64::MAX), Some(i64::MIN)],
341 vec![Some("7fffffffffffffff"), Some("8000000000000000")]
342 );
343
344 test_to_hex_type!(
345 to_hex_large_unsigned,
346 UInt64Type,
347 UInt64Array,
348 vec![Some(u64::MAX), Some(u64::MIN)],
349 vec![Some("ffffffffffffffff"), Some("0")]
350 );
351}