datafusion_execution/
parquet_encryption.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::datatypes::SchemaRef;
19use async_trait::async_trait;
20use dashmap::DashMap;
21use datafusion_common::config::EncryptionFactoryOptions;
22use datafusion_common::error::Result;
23use datafusion_common::DataFusionError;
24use object_store::path::Path;
25use parquet::encryption::decrypt::FileDecryptionProperties;
26use parquet::encryption::encrypt::FileEncryptionProperties;
27use std::sync::Arc;
28
29/// Trait for types that generate file encryption and decryption properties to
30/// write and read encrypted Parquet files.
31/// This allows flexibility in how encryption keys are managed, for example, to
32/// integrate with a user's key management service (KMS).
33/// For example usage, see the [`parquet_encrypted_with_kms` example].
34///
35/// [`parquet_encrypted_with_kms` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/parquet_encrypted_with_kms.rs
36#[async_trait]
37pub trait EncryptionFactory: Send + Sync + std::fmt::Debug + 'static {
38    /// Generate file encryption properties to use when writing a Parquet file.
39    async fn get_file_encryption_properties(
40        &self,
41        config: &EncryptionFactoryOptions,
42        schema: &SchemaRef,
43        file_path: &Path,
44    ) -> Result<Option<FileEncryptionProperties>>;
45
46    /// Generate file decryption properties to use when reading a Parquet file.
47    async fn get_file_decryption_properties(
48        &self,
49        config: &EncryptionFactoryOptions,
50        file_path: &Path,
51    ) -> Result<Option<FileDecryptionProperties>>;
52}
53
54/// Stores [`EncryptionFactory`] implementations that can be retrieved by a unique string identifier
55#[derive(Clone, Debug, Default)]
56pub struct EncryptionFactoryRegistry {
57    factories: DashMap<String, Arc<dyn EncryptionFactory>>,
58}
59
60impl EncryptionFactoryRegistry {
61    /// Register an [`EncryptionFactory`] with an associated identifier that can be later
62    /// used to configure encryption when reading or writing Parquet.
63    /// If an encryption factory with the same identifier was already registered, it is replaced and returned.
64    pub fn register_factory(
65        &self,
66        id: &str,
67        factory: Arc<dyn EncryptionFactory>,
68    ) -> Option<Arc<dyn EncryptionFactory>> {
69        self.factories.insert(id.to_owned(), factory)
70    }
71
72    /// Retrieve an [`EncryptionFactory`] by its identifier
73    pub fn get_factory(&self, id: &str) -> Result<Arc<dyn EncryptionFactory>> {
74        self.factories
75            .get(id)
76            .map(|f| Arc::clone(f.value()))
77            .ok_or_else(|| {
78                DataFusionError::Internal(format!(
79                    "No Parquet encryption factory found for id '{id}'"
80                ))
81            })
82    }
83}