datafusion_execution/parquet_encryption.rs
1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18use arrow::datatypes::SchemaRef;
19use async_trait::async_trait;
20use dashmap::DashMap;
21use datafusion_common::config::EncryptionFactoryOptions;
22use datafusion_common::error::Result;
23use datafusion_common::DataFusionError;
24use object_store::path::Path;
25use parquet::encryption::decrypt::FileDecryptionProperties;
26use parquet::encryption::encrypt::FileEncryptionProperties;
27use std::sync::Arc;
28
29/// Trait for types that generate file encryption and decryption properties to
30/// write and read encrypted Parquet files.
31/// This allows flexibility in how encryption keys are managed, for example, to
32/// integrate with a user's key management service (KMS).
33/// For example usage, see the [`parquet_encrypted_with_kms` example].
34///
35/// [`parquet_encrypted_with_kms` example]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/parquet_encrypted_with_kms.rs
36#[async_trait]
37pub trait EncryptionFactory: Send + Sync + std::fmt::Debug + 'static {
38 /// Generate file encryption properties to use when writing a Parquet file.
39 async fn get_file_encryption_properties(
40 &self,
41 config: &EncryptionFactoryOptions,
42 schema: &SchemaRef,
43 file_path: &Path,
44 ) -> Result<Option<FileEncryptionProperties>>;
45
46 /// Generate file decryption properties to use when reading a Parquet file.
47 async fn get_file_decryption_properties(
48 &self,
49 config: &EncryptionFactoryOptions,
50 file_path: &Path,
51 ) -> Result<Option<FileDecryptionProperties>>;
52}
53
54/// Stores [`EncryptionFactory`] implementations that can be retrieved by a unique string identifier
55#[derive(Clone, Debug, Default)]
56pub struct EncryptionFactoryRegistry {
57 factories: DashMap<String, Arc<dyn EncryptionFactory>>,
58}
59
60impl EncryptionFactoryRegistry {
61 /// Register an [`EncryptionFactory`] with an associated identifier that can be later
62 /// used to configure encryption when reading or writing Parquet.
63 /// If an encryption factory with the same identifier was already registered, it is replaced and returned.
64 pub fn register_factory(
65 &self,
66 id: &str,
67 factory: Arc<dyn EncryptionFactory>,
68 ) -> Option<Arc<dyn EncryptionFactory>> {
69 self.factories.insert(id.to_owned(), factory)
70 }
71
72 /// Retrieve an [`EncryptionFactory`] by its identifier
73 pub fn get_factory(&self, id: &str) -> Result<Arc<dyn EncryptionFactory>> {
74 self.factories
75 .get(id)
76 .map(|f| Arc::clone(f.value()))
77 .ok_or_else(|| {
78 DataFusionError::Internal(format!(
79 "No Parquet encryption factory found for id '{id}'"
80 ))
81 })
82 }
83}