datafusion_functions/crypto/
digest.rs1use crate::crypto::basic::{DigestAlgorithm, digest_process};
19
20use arrow::datatypes::DataType;
21use datafusion_common::{
22 Result, exec_err, not_impl_err,
23 types::{logical_binary, logical_string},
24 utils::take_function_args,
25};
26use datafusion_expr::{
27 ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28 TypeSignature, Volatility,
29};
30use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
31use datafusion_macros::user_doc;
32
33#[user_doc(
34 doc_section(label = "Hashing Functions"),
35 description = "Computes the binary hash of an expression using the specified algorithm.",
36 syntax_example = "digest(expression, algorithm)",
37 sql_example = r#"```sql
38> select digest('foo', 'sha256');
39+------------------------------------------------------------------+
40| digest(Utf8("foo"),Utf8("sha256")) |
41+------------------------------------------------------------------+
42| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae |
43+------------------------------------------------------------------+
44```"#,
45 standard_argument(name = "expression", prefix = "String"),
46 argument(
47 name = "algorithm",
48 description = "String expression specifying algorithm to use. Must be one of:
49 - md5
50 - sha224
51 - sha256
52 - sha384
53 - sha512
54 - blake2s
55 - blake2b
56 - blake3"
57 )
58)]
59#[derive(Debug, PartialEq, Eq, Hash)]
60pub struct DigestFunc {
61 signature: Signature,
62}
63
64impl Default for DigestFunc {
65 fn default() -> Self {
66 Self::new()
67 }
68}
69
70impl DigestFunc {
71 pub fn new() -> Self {
72 Self {
73 signature: Signature::one_of(
74 vec![
75 TypeSignature::Coercible(vec![
76 Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
77 Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
78 ]),
79 TypeSignature::Coercible(vec![
80 Coercion::new_exact(TypeSignatureClass::Native(logical_binary())),
81 Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
82 ]),
83 ],
84 Volatility::Immutable,
85 ),
86 }
87 }
88}
89
90impl ScalarUDFImpl for DigestFunc {
91 fn name(&self) -> &str {
92 "digest"
93 }
94
95 fn signature(&self) -> &Signature {
96 &self.signature
97 }
98
99 fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
100 Ok(DataType::Binary)
101 }
102
103 fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
104 let [data, digest_algorithm] = take_function_args(self.name(), &args.args)?;
105 digest(data, digest_algorithm)
106 }
107
108 fn documentation(&self) -> Option<&Documentation> {
109 self.doc()
110 }
111}
112
113fn digest(
117 data: &ColumnarValue,
118 digest_algorithm: &ColumnarValue,
119) -> Result<ColumnarValue> {
120 let digest_algorithm = match digest_algorithm {
121 ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
122 Some(Some(method)) => method.parse::<DigestAlgorithm>(),
123 _ => exec_err!("Unsupported data type {scalar:?} for function digest"),
124 },
125 ColumnarValue::Array(_) => {
126 not_impl_err!("Digest using dynamically decided method is not yet supported")
127 }
128 }?;
129 digest_process(data, digest_algorithm)
130}