1use std::sync::Arc;
19
20use arrow::array::{Array, ArrayRef, StringArray};
21use arrow::buffer::{Buffer, OffsetBuffer};
22use arrow::datatypes::{
23 ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
24 Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
25};
26use datafusion_common::cast::as_primitive_array;
27use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
28use datafusion_expr::{
29 Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
30 TypeSignatureClass, Volatility,
31};
32use datafusion_macros::user_doc;
33
34const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
36
37fn to_hex_array<T: ArrowPrimitiveType>(array: &ArrayRef) -> Result<ArrayRef>
40where
41 T::Native: ToHex,
42{
43 let integer_array = as_primitive_array::<T>(array)?;
44 let len = integer_array.len();
45
46 let max_hex_len = T::Native::get_byte_width() * 2;
48
49 let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
51 let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
52
53 let mut hex_buffer = [0u8; 16];
55
56 offsets.push(0);
58
59 for value in integer_array.values() {
62 let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
63 values.extend_from_slice(&hex_buffer[16 - hex_len..]);
64 offsets.push(values.len() as i32);
65 }
66
67 let nulls = integer_array.nulls().cloned();
69
70 let offsets =
73 unsafe { OffsetBuffer::new_unchecked(Buffer::from_vec(offsets).into()) };
74 let result = StringArray::new(offsets, Buffer::from_vec(values), nulls);
75
76 Ok(Arc::new(result) as ArrayRef)
77}
78
79#[inline]
80fn to_hex_scalar<T: ToHex>(value: T) -> String {
81 let mut hex_buffer = [0u8; 16];
82 let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
83 unsafe { std::str::from_utf8_unchecked(&hex_buffer[16 - hex_len..]).to_string() }
85}
86
87trait ToHex: ArrowNativeType {
89 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize;
92}
93
94#[inline]
97fn write_unsigned_hex_to_buffer(value: u64, buffer: &mut [u8; 16]) -> usize {
98 if value == 0 {
99 buffer[15] = b'0';
100 return 1;
101 }
102
103 let mut pos = 16;
105 let mut v = value;
106 while v > 0 {
107 pos -= 1;
108 buffer[pos] = HEX_CHARS[(v & 0xf) as usize];
109 v >>= 4;
110 }
111
112 16 - pos
113}
114
115#[inline]
117fn write_signed_hex_to_buffer(value: i64, buffer: &mut [u8; 16]) -> usize {
118 write_unsigned_hex_to_buffer(value as u64, buffer)
120}
121
122impl ToHex for i8 {
123 #[inline]
124 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
125 write_signed_hex_to_buffer(self as i64, buffer)
126 }
127}
128
129impl ToHex for i16 {
130 #[inline]
131 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
132 write_signed_hex_to_buffer(self as i64, buffer)
133 }
134}
135
136impl ToHex for i32 {
137 #[inline]
138 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
139 write_signed_hex_to_buffer(self as i64, buffer)
140 }
141}
142
143impl ToHex for i64 {
144 #[inline]
145 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
146 write_signed_hex_to_buffer(self, buffer)
147 }
148}
149
150impl ToHex for u8 {
151 #[inline]
152 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
153 write_unsigned_hex_to_buffer(self as u64, buffer)
154 }
155}
156
157impl ToHex for u16 {
158 #[inline]
159 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
160 write_unsigned_hex_to_buffer(self as u64, buffer)
161 }
162}
163
164impl ToHex for u32 {
165 #[inline]
166 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
167 write_unsigned_hex_to_buffer(self as u64, buffer)
168 }
169}
170
171impl ToHex for u64 {
172 #[inline]
173 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
174 write_unsigned_hex_to_buffer(self, buffer)
175 }
176}
177
178#[user_doc(
179 doc_section(label = "String Functions"),
180 description = "Converts an integer to a hexadecimal string.",
181 syntax_example = "to_hex(int)",
182 sql_example = r#"```sql
183> select to_hex(12345689);
184+-------------------------+
185| to_hex(Int64(12345689)) |
186+-------------------------+
187| bc6159 |
188+-------------------------+
189```"#,
190 standard_argument(name = "int", prefix = "Integer")
191)]
192#[derive(Debug, PartialEq, Eq, Hash)]
193pub struct ToHexFunc {
194 signature: Signature,
195}
196
197impl Default for ToHexFunc {
198 fn default() -> Self {
199 Self::new()
200 }
201}
202
203impl ToHexFunc {
204 pub fn new() -> Self {
205 Self {
206 signature: Signature::coercible(
207 vec![Coercion::new_exact(TypeSignatureClass::Integer)],
208 Volatility::Immutable,
209 ),
210 }
211 }
212}
213
214impl ScalarUDFImpl for ToHexFunc {
215 fn name(&self) -> &str {
216 "to_hex"
217 }
218
219 fn signature(&self) -> &Signature {
220 &self.signature
221 }
222
223 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
224 Ok(DataType::Utf8)
225 }
226
227 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
228 let arg = &args.args[0];
229
230 match arg {
231 ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) => Ok(
232 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
233 ),
234 ColumnarValue::Scalar(ScalarValue::UInt64(Some(v))) => Ok(
235 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
236 ),
237 ColumnarValue::Scalar(ScalarValue::Int32(Some(v))) => Ok(
238 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
239 ),
240 ColumnarValue::Scalar(ScalarValue::UInt32(Some(v))) => Ok(
241 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
242 ),
243 ColumnarValue::Scalar(ScalarValue::Int16(Some(v))) => Ok(
244 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
245 ),
246 ColumnarValue::Scalar(ScalarValue::UInt16(Some(v))) => Ok(
247 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
248 ),
249 ColumnarValue::Scalar(ScalarValue::Int8(Some(v))) => Ok(
250 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
251 ),
252 ColumnarValue::Scalar(ScalarValue::UInt8(Some(v))) => Ok(
253 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
254 ),
255
256 ColumnarValue::Scalar(s) if s.is_null() => {
258 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
259 }
260
261 ColumnarValue::Array(array) => match array.data_type() {
262 DataType::Int64 => {
263 Ok(ColumnarValue::Array(to_hex_array::<Int64Type>(array)?))
264 }
265 DataType::UInt64 => {
266 Ok(ColumnarValue::Array(to_hex_array::<UInt64Type>(array)?))
267 }
268 DataType::Int32 => {
269 Ok(ColumnarValue::Array(to_hex_array::<Int32Type>(array)?))
270 }
271 DataType::UInt32 => {
272 Ok(ColumnarValue::Array(to_hex_array::<UInt32Type>(array)?))
273 }
274 DataType::Int16 => {
275 Ok(ColumnarValue::Array(to_hex_array::<Int16Type>(array)?))
276 }
277 DataType::UInt16 => {
278 Ok(ColumnarValue::Array(to_hex_array::<UInt16Type>(array)?))
279 }
280 DataType::Int8 => {
281 Ok(ColumnarValue::Array(to_hex_array::<Int8Type>(array)?))
282 }
283 DataType::UInt8 => {
284 Ok(ColumnarValue::Array(to_hex_array::<UInt8Type>(array)?))
285 }
286 other => exec_err!("Unsupported data type {other:?} for function to_hex"),
287 },
288
289 other => internal_err!(
290 "Unexpected argument type {:?} for function to_hex",
291 other.data_type()
292 ),
293 }
294 }
295
296 fn documentation(&self) -> Option<&Documentation> {
297 self.doc()
298 }
299}
300
301#[cfg(test)]
302mod tests {
303 use arrow::array::{
304 Int8Array, Int16Array, Int32Array, Int64Array, StringArray, UInt8Array,
305 UInt16Array, UInt32Array, UInt64Array,
306 };
307 use datafusion_common::cast::as_string_array;
308
309 use super::*;
310
311 macro_rules! test_to_hex_type {
312 ($name:ident, $arrow_type:ty, $array_type:ty) => {
314 test_to_hex_type!(
315 $name,
316 $arrow_type,
317 $array_type,
318 vec![Some(100), Some(0), None],
319 vec![Some("64"), Some("0"), None]
320 );
321 };
322
323 ($name:ident, $arrow_type:ty, $array_type:ty, $input:expr, $expected:expr) => {
325 #[test]
326 fn $name() -> Result<()> {
327 let input = $input;
328 let expected = $expected;
329
330 let array = <$array_type>::from(input);
331 let array_ref: ArrayRef = Arc::new(array);
332 let hex_result = to_hex_array::<$arrow_type>(&array_ref)?;
333 let hex_array = as_string_array(&hex_result)?;
334 let expected_array = StringArray::from(expected);
335
336 assert_eq!(&expected_array, hex_array);
337 Ok(())
338 }
339 };
340 }
341
342 test_to_hex_type!(
343 to_hex_int8,
344 Int8Type,
345 Int8Array,
346 vec![Some(100), Some(0), None, Some(-1)],
347 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
348 );
349 test_to_hex_type!(
350 to_hex_int16,
351 Int16Type,
352 Int16Array,
353 vec![Some(100), Some(0), None, Some(-1)],
354 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
355 );
356 test_to_hex_type!(
357 to_hex_int32,
358 Int32Type,
359 Int32Array,
360 vec![Some(100), Some(0), None, Some(-1)],
361 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
362 );
363 test_to_hex_type!(
364 to_hex_int64,
365 Int64Type,
366 Int64Array,
367 vec![Some(100), Some(0), None, Some(-1)],
368 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
369 );
370
371 test_to_hex_type!(to_hex_uint8, UInt8Type, UInt8Array);
372 test_to_hex_type!(to_hex_uint16, UInt16Type, UInt16Array);
373 test_to_hex_type!(to_hex_uint32, UInt32Type, UInt32Array);
374 test_to_hex_type!(to_hex_uint64, UInt64Type, UInt64Array);
375
376 test_to_hex_type!(
377 to_hex_large_signed,
378 Int64Type,
379 Int64Array,
380 vec![Some(i64::MAX), Some(i64::MIN)],
381 vec![Some("7fffffffffffffff"), Some("8000000000000000")]
382 );
383
384 test_to_hex_type!(
385 to_hex_large_unsigned,
386 UInt64Type,
387 UInt64Array,
388 vec![Some(u64::MAX), Some(u64::MIN)],
389 vec![Some("ffffffffffffffff"), Some("0")]
390 );
391}