datafusion_functions/crypto/
md5.rs1use arrow::{array::StringViewArray, datatypes::DataType};
19use datafusion_common::{
20 Result, ScalarValue,
21 cast::as_binary_array,
22 internal_err,
23 types::{logical_binary, logical_string},
24 utils::take_function_args,
25};
26use datafusion_expr::{
27 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28 TypeSignature, Volatility,
29};
30use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
31use datafusion_macros::user_doc;
32use std::sync::Arc;
33
34use crate::crypto::basic::{DigestAlgorithm, digest_process};
35
36#[user_doc(
37 doc_section(label = "Hashing Functions"),
38 description = "Computes an MD5 128-bit checksum for a string expression.",
39 syntax_example = "md5(expression)",
40 sql_example = r#"```sql
41> select md5('foo');
42+----------------------------------+
43| md5(Utf8("foo")) |
44+----------------------------------+
45| acbd18db4cc2f85cedef654fccc4a4d8 |
46+----------------------------------+
47```"#,
48 standard_argument(name = "expression", prefix = "String")
49)]
50#[derive(Debug, PartialEq, Eq, Hash)]
51pub struct Md5Func {
52 signature: Signature,
53}
54
55impl Default for Md5Func {
56 fn default() -> Self {
57 Self::new()
58 }
59}
60
61impl Md5Func {
62 pub fn new() -> Self {
63 Self {
64 signature: Signature::one_of(
65 vec![
66 TypeSignature::Coercible(vec![Coercion::new_exact(
67 TypeSignatureClass::Native(logical_string()),
68 )]),
69 TypeSignature::Coercible(vec![Coercion::new_exact(
70 TypeSignatureClass::Native(logical_binary()),
71 )]),
72 ],
73 Volatility::Immutable,
74 ),
75 }
76 }
77}
78
79impl ScalarUDFImpl for Md5Func {
80 fn name(&self) -> &str {
81 "md5"
82 }
83
84 fn signature(&self) -> &Signature {
85 &self.signature
86 }
87
88 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
89 Ok(DataType::Utf8View)
90 }
91
92 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
93 md5(&args.args)
94 }
95
96 fn documentation(&self) -> Option<&Documentation> {
97 self.doc()
98 }
99}
100
101const HEX_CHARS_LOWER: &[u8; 16] = b"0123456789abcdef";
103
104#[inline]
107fn hex_encode(data: impl AsRef<[u8]>) -> String {
108 let bytes = data.as_ref();
109 let mut s = String::with_capacity(bytes.len() * 2);
110 for &b in bytes {
111 s.push(HEX_CHARS_LOWER[(b >> 4) as usize] as char);
112 s.push(HEX_CHARS_LOWER[(b & 0x0f) as usize] as char);
113 }
114 s
115}
116
117fn md5(args: &[ColumnarValue]) -> Result<ColumnarValue> {
118 let [data] = take_function_args("md5", args)?;
119 let value = digest_process(data, DigestAlgorithm::Md5)?;
120
121 Ok(match value {
123 ColumnarValue::Array(array) => {
124 let binary_array = as_binary_array(&array)?;
125 let string_array: StringViewArray =
126 binary_array.iter().map(|opt| opt.map(hex_encode)).collect();
127 ColumnarValue::Array(Arc::new(string_array))
128 }
129 ColumnarValue::Scalar(ScalarValue::Binary(opt)) => {
130 ColumnarValue::Scalar(ScalarValue::Utf8View(opt.map(hex_encode)))
131 }
132 _ => return internal_err!("Impossibly got invalid results from digest"),
133 })
134}