1use std::any::Any;
19use std::sync::Arc;
20
21use arrow::array::{Array, ArrayRef, StringArray};
22use arrow::buffer::{Buffer, OffsetBuffer};
23use arrow::datatypes::{
24 ArrowNativeType, ArrowPrimitiveType, DataType, Int8Type, Int16Type, Int32Type,
25 Int64Type, UInt8Type, UInt16Type, UInt32Type, UInt64Type,
26};
27use datafusion_common::cast::as_primitive_array;
28use datafusion_common::{Result, ScalarValue, exec_err, internal_err};
29use datafusion_expr::{
30 Coercion, ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
31 TypeSignatureClass, Volatility,
32};
33use datafusion_macros::user_doc;
34
35const HEX_CHARS: &[u8; 16] = b"0123456789abcdef";
37
38fn to_hex_array<T: ArrowPrimitiveType>(array: &ArrayRef) -> Result<ArrayRef>
41where
42 T::Native: ToHex,
43{
44 let integer_array = as_primitive_array::<T>(array)?;
45 let len = integer_array.len();
46
47 let max_hex_len = T::Native::get_byte_width() * 2;
49
50 let mut offsets: Vec<i32> = Vec::with_capacity(len + 1);
52 let mut values: Vec<u8> = Vec::with_capacity(len * max_hex_len);
53
54 let mut hex_buffer = [0u8; 16];
56
57 offsets.push(0);
59
60 for value in integer_array.values() {
63 let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
64 values.extend_from_slice(&hex_buffer[16 - hex_len..]);
65 offsets.push(values.len() as i32);
66 }
67
68 let nulls = integer_array.nulls().cloned();
70
71 let offsets =
74 unsafe { OffsetBuffer::new_unchecked(Buffer::from_vec(offsets).into()) };
75 let result = StringArray::new(offsets, Buffer::from_vec(values), nulls);
76
77 Ok(Arc::new(result) as ArrayRef)
78}
79
80#[inline]
81fn to_hex_scalar<T: ToHex>(value: T) -> String {
82 let mut hex_buffer = [0u8; 16];
83 let hex_len = value.write_hex_to_buffer(&mut hex_buffer);
84 unsafe { std::str::from_utf8_unchecked(&hex_buffer[16 - hex_len..]).to_string() }
86}
87
88trait ToHex: ArrowNativeType {
90 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize;
93}
94
95#[inline]
98fn write_unsigned_hex_to_buffer(value: u64, buffer: &mut [u8; 16]) -> usize {
99 if value == 0 {
100 buffer[15] = b'0';
101 return 1;
102 }
103
104 let mut pos = 16;
106 let mut v = value;
107 while v > 0 {
108 pos -= 1;
109 buffer[pos] = HEX_CHARS[(v & 0xf) as usize];
110 v >>= 4;
111 }
112
113 16 - pos
114}
115
116#[inline]
118fn write_signed_hex_to_buffer(value: i64, buffer: &mut [u8; 16]) -> usize {
119 write_unsigned_hex_to_buffer(value as u64, buffer)
121}
122
123impl ToHex for i8 {
124 #[inline]
125 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
126 write_signed_hex_to_buffer(self as i64, buffer)
127 }
128}
129
130impl ToHex for i16 {
131 #[inline]
132 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
133 write_signed_hex_to_buffer(self as i64, buffer)
134 }
135}
136
137impl ToHex for i32 {
138 #[inline]
139 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
140 write_signed_hex_to_buffer(self as i64, buffer)
141 }
142}
143
144impl ToHex for i64 {
145 #[inline]
146 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
147 write_signed_hex_to_buffer(self, buffer)
148 }
149}
150
151impl ToHex for u8 {
152 #[inline]
153 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
154 write_unsigned_hex_to_buffer(self as u64, buffer)
155 }
156}
157
158impl ToHex for u16 {
159 #[inline]
160 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
161 write_unsigned_hex_to_buffer(self as u64, buffer)
162 }
163}
164
165impl ToHex for u32 {
166 #[inline]
167 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
168 write_unsigned_hex_to_buffer(self as u64, buffer)
169 }
170}
171
172impl ToHex for u64 {
173 #[inline]
174 fn write_hex_to_buffer(self, buffer: &mut [u8; 16]) -> usize {
175 write_unsigned_hex_to_buffer(self, buffer)
176 }
177}
178
179#[user_doc(
180 doc_section(label = "String Functions"),
181 description = "Converts an integer to a hexadecimal string.",
182 syntax_example = "to_hex(int)",
183 sql_example = r#"```sql
184> select to_hex(12345689);
185+-------------------------+
186| to_hex(Int64(12345689)) |
187+-------------------------+
188| bc6159 |
189+-------------------------+
190```"#,
191 standard_argument(name = "int", prefix = "Integer")
192)]
193#[derive(Debug, PartialEq, Eq, Hash)]
194pub struct ToHexFunc {
195 signature: Signature,
196}
197
198impl Default for ToHexFunc {
199 fn default() -> Self {
200 Self::new()
201 }
202}
203
204impl ToHexFunc {
205 pub fn new() -> Self {
206 Self {
207 signature: Signature::coercible(
208 vec![Coercion::new_exact(TypeSignatureClass::Integer)],
209 Volatility::Immutable,
210 ),
211 }
212 }
213}
214
215impl ScalarUDFImpl for ToHexFunc {
216 fn as_any(&self) -> &dyn Any {
217 self
218 }
219
220 fn name(&self) -> &str {
221 "to_hex"
222 }
223
224 fn signature(&self) -> &Signature {
225 &self.signature
226 }
227
228 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
229 Ok(DataType::Utf8)
230 }
231
232 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
233 let arg = &args.args[0];
234
235 match arg {
236 ColumnarValue::Scalar(ScalarValue::Int64(Some(v))) => Ok(
237 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
238 ),
239 ColumnarValue::Scalar(ScalarValue::UInt64(Some(v))) => Ok(
240 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
241 ),
242 ColumnarValue::Scalar(ScalarValue::Int32(Some(v))) => Ok(
243 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
244 ),
245 ColumnarValue::Scalar(ScalarValue::UInt32(Some(v))) => Ok(
246 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
247 ),
248 ColumnarValue::Scalar(ScalarValue::Int16(Some(v))) => Ok(
249 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
250 ),
251 ColumnarValue::Scalar(ScalarValue::UInt16(Some(v))) => Ok(
252 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
253 ),
254 ColumnarValue::Scalar(ScalarValue::Int8(Some(v))) => Ok(
255 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
256 ),
257 ColumnarValue::Scalar(ScalarValue::UInt8(Some(v))) => Ok(
258 ColumnarValue::Scalar(ScalarValue::Utf8(Some(to_hex_scalar(*v)))),
259 ),
260
261 ColumnarValue::Scalar(s) if s.is_null() => {
263 Ok(ColumnarValue::Scalar(ScalarValue::Utf8(None)))
264 }
265
266 ColumnarValue::Array(array) => match array.data_type() {
267 DataType::Int64 => {
268 Ok(ColumnarValue::Array(to_hex_array::<Int64Type>(array)?))
269 }
270 DataType::UInt64 => {
271 Ok(ColumnarValue::Array(to_hex_array::<UInt64Type>(array)?))
272 }
273 DataType::Int32 => {
274 Ok(ColumnarValue::Array(to_hex_array::<Int32Type>(array)?))
275 }
276 DataType::UInt32 => {
277 Ok(ColumnarValue::Array(to_hex_array::<UInt32Type>(array)?))
278 }
279 DataType::Int16 => {
280 Ok(ColumnarValue::Array(to_hex_array::<Int16Type>(array)?))
281 }
282 DataType::UInt16 => {
283 Ok(ColumnarValue::Array(to_hex_array::<UInt16Type>(array)?))
284 }
285 DataType::Int8 => {
286 Ok(ColumnarValue::Array(to_hex_array::<Int8Type>(array)?))
287 }
288 DataType::UInt8 => {
289 Ok(ColumnarValue::Array(to_hex_array::<UInt8Type>(array)?))
290 }
291 other => exec_err!("Unsupported data type {other:?} for function to_hex"),
292 },
293
294 other => internal_err!(
295 "Unexpected argument type {:?} for function to_hex",
296 other.data_type()
297 ),
298 }
299 }
300
301 fn documentation(&self) -> Option<&Documentation> {
302 self.doc()
303 }
304}
305
306#[cfg(test)]
307mod tests {
308 use arrow::array::{
309 Int8Array, Int16Array, Int32Array, Int64Array, StringArray, UInt8Array,
310 UInt16Array, UInt32Array, UInt64Array,
311 };
312 use datafusion_common::cast::as_string_array;
313
314 use super::*;
315
316 macro_rules! test_to_hex_type {
317 ($name:ident, $arrow_type:ty, $array_type:ty) => {
319 test_to_hex_type!(
320 $name,
321 $arrow_type,
322 $array_type,
323 vec![Some(100), Some(0), None],
324 vec![Some("64"), Some("0"), None]
325 );
326 };
327
328 ($name:ident, $arrow_type:ty, $array_type:ty, $input:expr, $expected:expr) => {
330 #[test]
331 fn $name() -> Result<()> {
332 let input = $input;
333 let expected = $expected;
334
335 let array = <$array_type>::from(input);
336 let array_ref: ArrayRef = Arc::new(array);
337 let hex_result = to_hex_array::<$arrow_type>(&array_ref)?;
338 let hex_array = as_string_array(&hex_result)?;
339 let expected_array = StringArray::from(expected);
340
341 assert_eq!(&expected_array, hex_array);
342 Ok(())
343 }
344 };
345 }
346
347 test_to_hex_type!(
348 to_hex_int8,
349 Int8Type,
350 Int8Array,
351 vec![Some(100), Some(0), None, Some(-1)],
352 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
353 );
354 test_to_hex_type!(
355 to_hex_int16,
356 Int16Type,
357 Int16Array,
358 vec![Some(100), Some(0), None, Some(-1)],
359 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
360 );
361 test_to_hex_type!(
362 to_hex_int32,
363 Int32Type,
364 Int32Array,
365 vec![Some(100), Some(0), None, Some(-1)],
366 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
367 );
368 test_to_hex_type!(
369 to_hex_int64,
370 Int64Type,
371 Int64Array,
372 vec![Some(100), Some(0), None, Some(-1)],
373 vec![Some("64"), Some("0"), None, Some("ffffffffffffffff")]
374 );
375
376 test_to_hex_type!(to_hex_uint8, UInt8Type, UInt8Array);
377 test_to_hex_type!(to_hex_uint16, UInt16Type, UInt16Array);
378 test_to_hex_type!(to_hex_uint32, UInt32Type, UInt32Array);
379 test_to_hex_type!(to_hex_uint64, UInt64Type, UInt64Array);
380
381 test_to_hex_type!(
382 to_hex_large_signed,
383 Int64Type,
384 Int64Array,
385 vec![Some(i64::MAX), Some(i64::MIN)],
386 vec![Some("7fffffffffffffff"), Some("8000000000000000")]
387 );
388
389 test_to_hex_type!(
390 to_hex_large_unsigned,
391 UInt64Type,
392 UInt64Array,
393 vec![Some(u64::MAX), Some(u64::MIN)],
394 vec![Some("ffffffffffffffff"), Some("0")]
395 );
396}