main/searchableencryption/
basic_searchable_encryption.rs

1// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::test_utils;
5use aws_sdk_dynamodb::types::AttributeValue;
6use std::collections::HashMap;
7
8use aws_db_esdk::material_providers::client;
9use aws_db_esdk::material_providers::types::material_providers_config::MaterialProvidersConfig;
10use aws_db_esdk::CryptoAction;
11
12use aws_db_esdk::dynamodb::types::BeaconKeySource;
13use aws_db_esdk::dynamodb::types::BeaconVersion;
14use aws_db_esdk::dynamodb::types::DynamoDbTableEncryptionConfig;
15use aws_db_esdk::dynamodb::types::SearchConfig;
16use aws_db_esdk::dynamodb::types::SingleKeyStore;
17use aws_db_esdk::dynamodb::types::StandardBeacon;
18use aws_db_esdk::intercept::DbEsdkInterceptor;
19use aws_db_esdk::key_store::client as keystore_client;
20use aws_db_esdk::key_store::types::key_store_config::KeyStoreConfig;
21use aws_db_esdk::key_store::types::KmsConfiguration;
22use aws_db_esdk::types::dynamo_db_tables_encryption_config::DynamoDbTablesEncryptionConfig;
23
24/*
25 This example demonstrates how to set up a beacon on an encrypted attribute,
26 put an item with the beacon, and query against that beacon.
27 This example follows a use case of a database that stores unit inspection information.
28
29 Running this example requires access to a DDB table with the
30 following key configuration:
31   - Partition key is named "work_id" with type (S)
32   - Sort key is named "inspection_date" with type (S)
33 This table must have a Global Secondary Index (GSI) configured named "last4-unit-index":
34   - Partition key is named "aws_dbe_b_inspector_id_last4" with type (S)
35   - Sort key is named "aws_dbe_b_unit" with type (S)
36
37 In this example for storing unit inspection information, this schema is utilized for the data:
38  - "work_id" stores a unique identifier for a unit inspection work order (v4 UUID)
39  - "inspection_date" stores an ISO 8601 date for the inspection (YYYY-MM-DD)
40  - "inspector_id_last4" stores the last 4 digits of the ID of the inspector performing the work
41  - "unit" stores a 12-digit serial number for the unit being inspected
42
43 The example requires the following ordered input command line parameters:
44   1. DDB table name for table to put/query data from
45   2. Branch key ID for a branch key that was previously created in your key store. See the
46      CreateKeyStoreKeyExample.
47   3. Branch key wrapping KMS key ARN for the KMS key used to create the branch key with ID
48      provided in arg 2
49   4. Branch key DDB table name for the DDB table representing the branch key store
50*/
51
52const GSI_NAME: &str = "last4-unit-index";
53
54pub async fn put_and_query_with_beacon(branch_key_id: &str) -> Result<(), crate::BoxError> {
55    // The whole thing is wrapped in a future to ensure that everything is Send and Sync
56    let future = async move {
57        let ddb_table_name = test_utils::UNIT_INSPECTION_TEST_DDB_TABLE_NAME;
58        let branch_key_wrapping_kms_key_arn = test_utils::TEST_BRANCH_KEY_WRAPPING_KMS_KEY_ARN;
59        let branch_key_ddb_table_name = test_utils::TEST_BRANCH_KEYSTORE_DDB_TABLE_NAME;
60
61        // 1. Configure Beacons.
62        //    The beacon name must be the name of a table attribute that will be encrypted.
63        //    The `length` parameter dictates how many bits are in the beacon attribute value.
64        //    The following link provides guidance on choosing a beacon length:
65        //        https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
66
67        // The configured DDB table has a GSI on the `aws_dbe_b_inspector_id_last4` AttributeName.
68        // This field holds the last 4 digits of an inspector ID.
69        // For our example, this field may range from 0 to 9,999 (10,000 possible values).
70        // For our example, we assume a full inspector ID is an integer
71        //     ranging from 0 to 99,999,999. We do not assume that the full inspector ID's
72        //     values are uniformly distributed across its range of possible values.
73        //     In many use cases, the prefix of an identifier encodes some information
74        //     about that identifier (e.g. zipcode and SSN prefixes encode geographic
75        //     information), while the suffix does not and is more uniformly distributed.
76        //     We will assume that the inspector ID field matches a similar use case.
77        //     So for this example, we only store and use the last
78        //     4 digits of the inspector ID, which we assume is uniformly distributed.
79        // Since the full ID's range is divisible by the range of the last 4 digits,
80        //     then the last 4 digits of the inspector ID are uniformly distributed
81        //     over the range from 0 to 9,999.
82        // See our documentation for why you should avoid creating beacons over non-uniform distributions
83        //  https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/searchable-encryption.html#are-beacons-right-for-me
84        // A single inspector ID suffix may be assigned to multiple `work_id`s.
85        //
86        // This link provides guidance for choosing a beacon length:
87        //    https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
88        // We follow the guidance in the link above to determine reasonable bounds
89        // for the length of a beacon on the last 4 digits of an inspector ID:
90        //  - min: log(sqrt(10,000))/log(2) ~= 6.6, round up to 7
91        //  - max: log((10,000/2))/log(2) ~= 12.3, round down to 12
92        // You will somehow need to round results to a nearby integer.
93        // We choose to round to the nearest integer; you might consider a different rounding approach.
94        // Rounding up will return fewer expected "false positives" in queries,
95        //    leading to fewer decrypt calls and better performance,
96        //    but it is easier to identify which beacon values encode distinct plaintexts.
97        // Rounding down will return more expected "false positives" in queries,
98        //    leading to more decrypt calls and worse performance,
99        //    but it is harder to identify which beacon values encode distinct plaintexts.
100        // We can choose a beacon length between 7 and 12:
101        //  - Closer to 7, we expect more "false positives" to be returned,
102        //    making it harder to identify which beacon values encode distinct plaintexts,
103        //    but leading to more decrypt calls and worse performance
104        //  - Closer to 12, we expect fewer "false positives" returned in queries,
105        //    leading to fewer decrypt calls and better performance,
106        //    but it is easier to identify which beacon values encode distinct plaintexts.
107        // As an example, we will choose 10.
108        //
109        // Values stored in aws_dbe_b_inspector_id_last4 will be 10 bits long (0x000 - 0x3ff)
110        // There will be 2^10 = 1,024 possible HMAC values.
111        // With a sufficiently large number of well-distributed inspector IDs,
112        //    for a particular beacon we expect (10,000/1,024) ~= 9.8 4-digit inspector ID suffixes
113        //    sharing that beacon value.
114        let last4_beacon = StandardBeacon::builder()
115            .name("inspector_id_last4")
116            .length(10)
117            .build()?;
118
119        // The configured DDB table has a GSI on the `aws_dbe_b_unit` AttributeName.
120        // This field holds a unit serial number.
121        // For this example, this is a 12-digit integer from 0 to 999,999,999,999 (10^12 possible values).
122        // We will assume values for this attribute are uniformly distributed across this range.
123        // A single unit serial number may be assigned to multiple `work_id`s.
124        //
125        // This link provides guidance for choosing a beacon length:
126        //    https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
127        // We follow the guidance in the link above to determine reasonable bounds
128        // for the length of a beacon on a unit serial number:
129        //  - min: log(sqrt(999,999,999,999))/log(2) ~= 19.9, round up to 20
130        //  - max: log((999,999,999,999/2))/log(2) ~= 38.9, round up to 39
131        // We can choose a beacon length between 20 and 39:
132        //  - Closer to 20, we expect more "false positives" to be returned,
133        //    making it harder to identify which beacon values encode distinct plaintexts,
134        //    but leading to more decrypt calls and worse performance
135        //  - Closer to 39, we expect fewer "false positives" returned in queries,
136        //    leading to fewer decrypt calls and better performance,
137        //    but it is easier to identify which beacon values encode distinct plaintexts.
138        // As an example, we will choose 30.
139        //
140        // Values stored in aws_dbe_b_unit will be 30 bits long (0x00000000 - 0x3fffffff)
141        // There will be 2^30 = 1,073,741,824 ~= 1.1B possible HMAC values.
142        // With a sufficiently large number of well-distributed inspector IDs,
143        //    for a particular beacon we expect (10^12/2^30) ~= 931.3 unit serial numbers
144        //    sharing that beacon value.
145        let unit_beacon = StandardBeacon::builder().name("unit").length(30).build()?;
146
147        let standard_beacon_list = vec![last4_beacon, unit_beacon];
148
149        // 2. Configure Keystore.
150        //    The keystore is a separate DDB table where the client stores encryption and decryption materials.
151        //    In order to configure beacons on the DDB client, you must configure a keystore.
152        //
153        //    This example expects that you have already set up a KeyStore with a single branch key.
154        //    See the "Create KeyStore Table Example" and "Create KeyStore Key Example" for how to do this.
155        //    After you create a branch key, you should persist its ID for use in this example.
156        let sdk_config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
157        let key_store_config = KeyStoreConfig::builder()
158            .kms_client(aws_sdk_kms::Client::new(&sdk_config))
159            .ddb_client(aws_sdk_dynamodb::Client::new(&sdk_config))
160            .ddb_table_name(branch_key_ddb_table_name)
161            .logical_key_store_name(branch_key_ddb_table_name)
162            .kms_configuration(KmsConfiguration::KmsKeyArn(
163                branch_key_wrapping_kms_key_arn.to_string(),
164            ))
165            .build()?;
166
167        let key_store = keystore_client::Client::from_conf(key_store_config)?;
168
169        // 3. Create BeaconVersion.
170        //    The BeaconVersion inside the list holds the list of beacons on the table.
171        //    The BeaconVersion also stores information about the keystore.
172        //    BeaconVersion must be provided:
173        //      - keyStore: The keystore configured in step 2.
174        //      - keySource: A configuration for the key source.
175        //        For simple use cases, we can configure a 'singleKeySource' which
176        //        statically configures a single beaconKey. That is the approach this example takes.
177        //        For use cases where you want to use different beacon keys depending on the data
178        //        (for example if your table holds data for multiple tenants, and you want to use
179        //        a different beacon key per tenant), look into configuring a MultiKeyStore:
180        //          https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/searchable-encryption-multitenant.html
181
182        let beacon_version = BeaconVersion::builder()
183            .standard_beacons(standard_beacon_list)
184            .version(1) // MUST be 1
185            .key_store(key_store.clone())
186            .key_source(BeaconKeySource::Single(
187                SingleKeyStore::builder()
188                    // `keyId` references a beacon key.
189                    // For every branch key we create in the keystore,
190                    // we also create a beacon key.
191                    // This beacon key is not the same as the branch key,
192                    // but is created with the same ID as the branch key.
193                    .key_id(branch_key_id)
194                    .cache_ttl(6000)
195                    .build()?,
196            ))
197            .build()?;
198        let beacon_versions = vec![beacon_version];
199
200        // 4. Create a Hierarchical Keyring
201        //    This is a KMS keyring that utilizes the keystore table.
202        //    This config defines how items are encrypted and decrypted.
203        //    NOTE: You should configure this to use the same keystore as your search config.
204        let provider_config = MaterialProvidersConfig::builder().build()?;
205        let mat_prov = client::Client::from_conf(provider_config)?;
206        let kms_keyring = mat_prov
207            .create_aws_kms_hierarchical_keyring()
208            .branch_key_id(branch_key_id)
209            .key_store(key_store)
210            .ttl_seconds(6000)
211            .send()
212            .await?;
213
214        // 5. Configure which attributes are encrypted and/or signed when writing new items.
215        //    For each attribute that may exist on the items we plan to write to our DynamoDbTable,
216        //    we must explicitly configure how they should be treated during item encryption:
217        //      - ENCRYPT_AND_SIGN: The attribute is encrypted and included in the signature
218        //      - SIGN_ONLY: The attribute not encrypted, but is still included in the signature
219        //      - DO_NOTHING: The attribute is not encrypted and not included in the signature
220        //    Any attributes that will be used in beacons must be configured as ENCRYPT_AND_SIGN.
221        let attribute_actions_on_encrypt = HashMap::from([
222            ("work_id".to_string(), CryptoAction::SignOnly), // Our partition attribute must be SIGN_ONLY
223            ("inspection_date".to_string(), CryptoAction::SignOnly), // Our sort attribute must be SIGN_ONLY
224            (
225                "inspector_id_last4".to_string(),
226                CryptoAction::EncryptAndSign,
227            ), // Beaconized attributes must be encrypted
228            ("unit".to_string(), CryptoAction::EncryptAndSign), // Beaconized attributes must be encrypted
229        ]);
230
231        // 6. Create the DynamoDb Encryption configuration for the table we will be writing to.
232        //    The beaconVersions are added to the search configuration.
233        let table_config = DynamoDbTableEncryptionConfig::builder()
234            .logical_table_name(ddb_table_name)
235            .partition_key_name("work_id")
236            .sort_key_name("inspection_date")
237            .attribute_actions_on_encrypt(attribute_actions_on_encrypt)
238            .keyring(kms_keyring)
239            .search(
240                SearchConfig::builder()
241                    .write_version(1) // MUST be 1
242                    .versions(beacon_versions)
243                    .build()?,
244            )
245            .build()?;
246
247        let table_configs = DynamoDbTablesEncryptionConfig::builder()
248            .table_encryption_configs(HashMap::from([(ddb_table_name.to_string(), table_config)]))
249            .build()?;
250
251        // 7. Create a new AWS SDK DynamoDb client using the TableEncryptionConfigs
252        let sdk_config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
253        let dynamo_config = aws_sdk_dynamodb::config::Builder::from(&sdk_config)
254            .interceptor(DbEsdkInterceptor::new(table_configs)?)
255            .build();
256        let ddb = aws_sdk_dynamodb::Client::from_conf(dynamo_config);
257
258        // 8. Put an item into our table using the above client.
259        //    Before the item gets sent to DynamoDb, it will be encrypted
260        //        client-side, according to our configuration.
261        //    Since our configuration includes beacons for `inspector_id_last4` and `unit`,
262        //        the client will add two additional attributes to the item. These attributes will have names
263        //        `aws_dbe_b_inspector_id_last4` and `aws_dbe_b_unit`. Their values will be HMACs
264        //        truncated to as many bits as the beacon's `length` parameter; e.g.
265        //    aws_dbe_b_inspector_id_last4 = truncate(HMAC("4321"), 10)
266        //    aws_dbe_b_unit = truncate(HMAC("123456789012"), 30)
267
268        let item = HashMap::from([
269            (
270                "work_id".to_string(),
271                AttributeValue::S("1313ba89-5661-41eb-ba6c-cb1b4cb67b2d".to_string()),
272            ),
273            (
274                "inspection_date".to_string(),
275                AttributeValue::S("2023-06-13".to_string()),
276            ),
277            (
278                "inspector_id_last4".to_string(),
279                AttributeValue::S("4321".to_string()),
280            ),
281            (
282                "unit".to_string(),
283                AttributeValue::S("123456789012".to_string()),
284            ),
285        ]);
286
287        ddb.put_item()
288            .table_name(ddb_table_name)
289            .set_item(Some(item.clone()))
290            .send()
291            .await?;
292
293        // 9. Query for the item we just put.
294        //     Note that we are constructing the query as if we were querying on plaintext values.
295        //     However, the DDB encryption client will detect that this attribute name has a beacon configured.
296        //     The client will add the beaconized attribute name and attribute value to the query,
297        //         and transform the query to use the beaconized name and value.
298        //     Internally, the client will query for and receive all items with a matching HMAC value in the beacon field.
299        //     This may include a number of "false positives" with different ciphertext, but the same truncated HMAC.
300        //     e.g. if truncate(HMAC("123456789012"), 30)
301        //          == truncate(HMAC("098765432109"), 30),
302        //     the query will return both items.
303        //     The client will decrypt all returned items to determine which ones have the expected attribute values,
304        //         and only surface items with the correct plaintext to the user.
305        //     This procedure is internal to the client and is abstracted away from the user;
306        //     e.g. the user will only see "123456789012" and never
307        //        "098765432109", though the actual query returned both.
308        let expression_attributes_names = HashMap::from([
309            ("#last4".to_string(), "inspector_id_last4".to_string()),
310            ("#unit".to_string(), "unit".to_string()),
311        ]);
312
313        let expression_attribute_values = HashMap::from([
314            (":last4".to_string(), AttributeValue::S("4321".to_string())),
315            (
316                ":unit".to_string(),
317                AttributeValue::S("123456789012".to_string()),
318            ),
319        ]);
320
321        // GSIs do not update instantly
322        // so if the results come back empty
323        // we retry after a short sleep
324        for _i in 0..10 {
325            let query_response = ddb
326                .query()
327                .table_name(ddb_table_name)
328                .index_name(GSI_NAME)
329                .key_condition_expression("#last4 = :last4 and #unit = :unit")
330                .set_expression_attribute_names(Some(expression_attributes_names.clone()))
331                .set_expression_attribute_values(Some(expression_attribute_values.clone()))
332                .send()
333                .await?;
334
335            // if no results, sleep and try again
336            if query_response.items.is_none() || query_response.items.as_ref().unwrap().is_empty() {
337                std::thread::sleep(std::time::Duration::from_millis(20));
338                continue;
339            }
340
341            let attribute_values = query_response.items.unwrap();
342            // Validate only 1 item was returned: the item we just put
343            assert_eq!(attribute_values.len(), 1);
344            let returned_item = &attribute_values[0];
345            // Validate the item has the expected attributes
346            assert_eq!(
347                returned_item["inspector_id_last4"],
348                AttributeValue::S("4321".to_string())
349            );
350            assert_eq!(
351                returned_item["unit"],
352                AttributeValue::S("123456789012".to_string())
353            );
354            break;
355        }
356        println!("basic_searchable_encryption successful.");
357        Ok(())
358    };
359    future.await
360}