Skip to main content

datafusion_functions/crypto/
digest.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::crypto::basic::{DigestAlgorithm, digest_process};
19
20use arrow::datatypes::DataType;
21use datafusion_common::{
22    Result, exec_err, not_impl_err,
23    types::{logical_binary, logical_string},
24    utils::take_function_args,
25};
26use datafusion_expr::{
27    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28    TypeSignature, Volatility,
29};
30use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
31use datafusion_macros::user_doc;
32
33#[user_doc(
34    doc_section(label = "Hashing Functions"),
35    description = "Computes the binary hash of an expression using the specified algorithm.",
36    syntax_example = "digest(expression, algorithm)",
37    sql_example = r#"```sql
38> select digest('foo', 'sha256');
39+------------------------------------------------------------------+
40| digest(Utf8("foo"),Utf8("sha256"))                               |
41+------------------------------------------------------------------+
42| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae |
43+------------------------------------------------------------------+
44```"#,
45    standard_argument(name = "expression", prefix = "String"),
46    argument(
47        name = "algorithm",
48        description = "String expression specifying algorithm to use. Must be one of:
49    - md5
50    - sha224
51    - sha256
52    - sha384
53    - sha512
54    - blake2s
55    - blake2b
56    - blake3"
57    )
58)]
59#[derive(Debug, PartialEq, Eq, Hash)]
60pub struct DigestFunc {
61    signature: Signature,
62}
63
64impl Default for DigestFunc {
65    fn default() -> Self {
66        Self::new()
67    }
68}
69
70impl DigestFunc {
71    pub fn new() -> Self {
72        Self {
73            signature: Signature::one_of(
74                vec![
75                    TypeSignature::Coercible(vec![
76                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
77                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
78                    ]),
79                    TypeSignature::Coercible(vec![
80                        Coercion::new_exact(TypeSignatureClass::Native(logical_binary())),
81                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
82                    ]),
83                ],
84                Volatility::Immutable,
85            ),
86        }
87    }
88}
89
90impl ScalarUDFImpl for DigestFunc {
91    fn name(&self) -> &str {
92        "digest"
93    }
94
95    fn signature(&self) -> &Signature {
96        &self.signature
97    }
98
99    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
100        Ok(DataType::Binary)
101    }
102
103    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
104        let [data, digest_algorithm] = take_function_args(self.name(), &args.args)?;
105        digest(data, digest_algorithm)
106    }
107
108    fn documentation(&self) -> Option<&Documentation> {
109        self.doc()
110    }
111}
112
113/// Compute binary hash of the given `data` (String or Binary array), according
114/// to the specified `digest_algorithm`. See [`DigestAlgorithm`] for supported
115/// algorithms.
116fn digest(
117    data: &ColumnarValue,
118    digest_algorithm: &ColumnarValue,
119) -> Result<ColumnarValue> {
120    let digest_algorithm = match digest_algorithm {
121        ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
122            Some(Some(method)) => method.parse::<DigestAlgorithm>(),
123            _ => exec_err!("Unsupported data type {scalar:?} for function digest"),
124        },
125        ColumnarValue::Array(_) => {
126            not_impl_err!("Digest using dynamically decided method is not yet supported")
127        }
128    }?;
129    digest_process(data, digest_algorithm)
130}