datafusion_functions/crypto/
digest.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use crate::crypto::basic::{DigestAlgorithm, digest_process};
19
20use arrow::datatypes::DataType;
21use datafusion_common::{
22    Result, exec_err, not_impl_err,
23    types::{logical_binary, logical_string},
24    utils::take_function_args,
25};
26use datafusion_expr::{
27    ColumnarValue, Documentation, ScalarFunctionArgs, ScalarUDFImpl, Signature,
28    TypeSignature, Volatility,
29};
30use datafusion_expr_common::signature::{Coercion, TypeSignatureClass};
31use datafusion_macros::user_doc;
32use std::any::Any;
33
34#[user_doc(
35    doc_section(label = "Hashing Functions"),
36    description = "Computes the binary hash of an expression using the specified algorithm.",
37    syntax_example = "digest(expression, algorithm)",
38    sql_example = r#"```sql
39> select digest('foo', 'sha256');
40+------------------------------------------------------------------+
41| digest(Utf8("foo"),Utf8("sha256"))                               |
42+------------------------------------------------------------------+
43| 2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae |
44+------------------------------------------------------------------+
45```"#,
46    standard_argument(name = "expression", prefix = "String"),
47    argument(
48        name = "algorithm",
49        description = "String expression specifying algorithm to use. Must be one of:
50    - md5
51    - sha224
52    - sha256
53    - sha384
54    - sha512
55    - blake2s
56    - blake2b
57    - blake3"
58    )
59)]
60#[derive(Debug, PartialEq, Eq, Hash)]
61pub struct DigestFunc {
62    signature: Signature,
63}
64
65impl Default for DigestFunc {
66    fn default() -> Self {
67        Self::new()
68    }
69}
70
71impl DigestFunc {
72    pub fn new() -> Self {
73        Self {
74            signature: Signature::one_of(
75                vec![
76                    TypeSignature::Coercible(vec![
77                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
78                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
79                    ]),
80                    TypeSignature::Coercible(vec![
81                        Coercion::new_exact(TypeSignatureClass::Native(logical_binary())),
82                        Coercion::new_exact(TypeSignatureClass::Native(logical_string())),
83                    ]),
84                ],
85                Volatility::Immutable,
86            ),
87        }
88    }
89}
90
91impl ScalarUDFImpl for DigestFunc {
92    fn as_any(&self) -> &dyn Any {
93        self
94    }
95
96    fn name(&self) -> &str {
97        "digest"
98    }
99
100    fn signature(&self) -> &Signature {
101        &self.signature
102    }
103
104    fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
105        Ok(DataType::Binary)
106    }
107
108    fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
109        let [data, digest_algorithm] = take_function_args(self.name(), &args.args)?;
110        digest(data, digest_algorithm)
111    }
112
113    fn documentation(&self) -> Option<&Documentation> {
114        self.doc()
115    }
116}
117
118/// Compute binary hash of the given `data` (String or Binary array), according
119/// to the specified `digest_algorithm`. See [`DigestAlgorithm`] for supported
120/// algorithms.
121fn digest(
122    data: &ColumnarValue,
123    digest_algorithm: &ColumnarValue,
124) -> Result<ColumnarValue> {
125    let digest_algorithm = match digest_algorithm {
126        ColumnarValue::Scalar(scalar) => match scalar.try_as_str() {
127            Some(Some(method)) => method.parse::<DigestAlgorithm>(),
128            _ => exec_err!("Unsupported data type {scalar:?} for function digest"),
129        },
130        ColumnarValue::Array(_) => {
131            not_impl_err!("Digest using dynamically decided method is not yet supported")
132        }
133    }?;
134    digest_process(data, digest_algorithm)
135}