main/searchableencryption/
virtual_beacon_searchable_encryption.rs

1// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use crate::test_utils;
5use aws_db_esdk::client as transform_client;
6use aws_db_esdk::dynamodb::types::BeaconKeySource;
7use aws_db_esdk::dynamodb::types::BeaconVersion;
8use aws_db_esdk::dynamodb::types::DynamoDbTableEncryptionConfig;
9use aws_db_esdk::dynamodb::types::GetPrefix;
10use aws_db_esdk::dynamodb::types::SearchConfig;
11use aws_db_esdk::dynamodb::types::SingleKeyStore;
12use aws_db_esdk::dynamodb::types::StandardBeacon;
13use aws_db_esdk::dynamodb::types::VirtualField;
14use aws_db_esdk::dynamodb::types::VirtualPart;
15use aws_db_esdk::dynamodb::types::VirtualTransform;
16use aws_db_esdk::intercept::DbEsdkInterceptor;
17use aws_db_esdk::key_store::client as keystore_client;
18use aws_db_esdk::key_store::types::key_store_config::KeyStoreConfig;
19use aws_db_esdk::key_store::types::KmsConfiguration;
20use aws_db_esdk::material_providers::client as mpl_client;
21use aws_db_esdk::material_providers::types::material_providers_config::MaterialProvidersConfig;
22use aws_db_esdk::CryptoAction;
23use aws_db_esdk::DynamoDbTablesEncryptionConfig;
24use aws_sdk_dynamodb::types::AttributeValue;
25use std::collections::HashMap;
26
27/*
28 This example demonstrates how to set up a virtual field from two DDB
29 attributes, create a standard beacon with that field, put an item with
30 that beacon, and query against that beacon.
31
32 A virtual field is a field consisting of a transformation of one or more attributes in a DDB item.
33 Virtual fields are useful in querying against encrypted fields that only have a handful of
34 possible values. They allow you to take fields with few possible values, concatenate
35 them to other fields, then query against the combined field. This enables using these types of
36 fields in queries while making it infeasible to identify which beacon values encode
37 the few possible distinct plaintexts. This is explained in more detail below.
38 Virtual fields are not stored in the DDB table. However, they are used to construct
39 a beacon, the value of which is stored.
40
41 For more information on virtual fields, see
42   https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/beacons.html#virtual-field
43
44 For our example, we will construct a virtual field
45 from two DDB attributes `state` and `hasTestResult` as `state`+prefix(`hasTestResult`, 1).
46 We will then create a beacon out of this virtual field and use it to search.
47
48 This example follows a use case of a database that stores customer test result metadata.
49 Records are indexed by `customer_id` and store a `state` attribute, representing the
50 US state or territory where the customer lives, and a `hasTestResult` boolean attribute,
51 representing whether the customer has a "test result" available. (Maybe this represents
52 some medical test result, and this table stores "result available" metadata.) We assume
53 that values in these fields are uniformly distributed across all possible values for
54 these fields (56 for `state`, 2 for `hasTestResult`), and are uniformly distributed across
55 customer IDs.
56
57 The motivation behind this example is to demonstrate how and why one would use a virtual beacon.
58 In this example, our table stores records with an encrypted boolean `hasTestResult` attribute.
59 We would like to be able to query for customers in a given state with a `true` hasTestResult
60 attribute.
61
62 To be able to execute this query securely and efficiently, we want the following
63 properties on our table:
64  1. Hide the distribution of `hasTestResult` attribute values (i.e. it should be infeasible
65     to determine the percentage of `true`s to `false`s across the dataset from beaconized
66     values)
67  2. Query against a combination of whether `hasTestResult` is true/false and the `state` field
68 We cannot achieve these properties with a standard beacon on a true/false attribute. Following
69 the guidance to choose a beacon length:
70   https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
71 For a boolean value (in our case, whether `hasTestResult` is true or false), the acceptable
72 bounds for beacon length are either 0 or 1. This corresponds to either not storing a beacon
73 (length 0), or effectively storing another boolean attribute (length 1). With
74 length 0, this beacon is useless for searching (violating property 2); with length 1, this
75 beacon may not hide the attribute (violating property 1).
76 In addition, choosing a longer beacon length does not help us.
77 Each attribute value is mapped to a distinct beacon.
78 Since booleans only have 2 possible attribute values, we will still only have 2 possible
79 beacon values, though those values may be longer. A longer beacon provides no advantages over
80 beacon of length 1 in this situation.
81
82 A compound beacon also does not help.
83 To (over)simplify, a compound beacon is a concatenation of standard beacons,
84 i.e. beacon(`state`)+beacon(`hasTestResult`).
85 The `hasTestResult` beacon is still visible, so we would still have the problems above.
86
87 To achieve these properties, we instead construct a virtual field and use that in our beacon,
88 i.e. beacon(`state`+`hasTestResult`). Assuming these fields are well-distributed across
89 customer IDs and possible values, this gives us both desired properties; we can query against
90 both attributes while hiding information from the underlying data. This is demonstrated in more
91 detail below.
92
93 Running this example requires access to a DDB table  with the
94 following primary key configuration:
95   - Partition key is named "customer_id" with type (S)
96   - Sort key is named "create_time" with type (S)
97 This table must have a Global Secondary Index (GSI) configured named "stateAndHasTestResult-index":
98   - Partition key is named "aws_dbe_b_stateAndHasTestResult" with type (S)
99
100 In this example for storing customer location data, this schema is utilized for the data:
101  - "customer_id" stores a unique customer identifier
102  - "create_time" stores a Unix timestamp
103  - "state" stores an encrypted 2-letter US state or territory abbreviation
104        (https://www.faa.gov/air_traffic/publications/atpubs/cnt_html/appendix_a.html)
105  - "hasTestResult" is not part of the schema, but is an attribute utilized in this example.
106     It stores a boolean attribute (false/true) indicating whether this customer has a test result
107     available.
108
109 The example requires the following ordered input command line parameters:
110   1. DDB table name for table to put/query data from
111   2. Branch key ID for a branch key that was previously created in your key store. See the
112      CreateKeyStoreKeyExample.
113   2. Branch key wrapping KMS key ARN for the KMS key used to create the branch key
114   3. Branch key DDB table name for the DDB table representing the branch key store
115*/
116
117const GSI_NAME: &str = "stateAndHasTestResult-index";
118
119pub async fn put_and_query_with_beacon(branch_key_id: &str) -> Result<(), crate::BoxError> {
120    let ddb_table_name = test_utils::SIMPLE_BEACON_TEST_DDB_TABLE_NAME;
121    let branch_key_wrapping_kms_key_arn = test_utils::TEST_BRANCH_KEY_WRAPPING_KMS_KEY_ARN;
122    let branch_key_ddb_table_name = test_utils::TEST_BRANCH_KEYSTORE_DDB_TABLE_NAME;
123
124    // 1. Construct a length-1 prefix virtual transform.
125    //    `hasTestResult` is a binary attribute, containing either `true` or `false`.
126    //    As an example to demonstrate virtual transforms, we will truncate the value
127    //    of `hasTestResult` in the virtual field to the length-1 prefix of the binary value, i.e.:
128    //     - "true" -> "t"
129    //     - "false -> "f"
130    //    This is not necessary. This is done as a demonstration of virtual transforms.
131    //    Virtual transform operations treat all attributes as strings
132    //    (i.e. the boolean value `true` is interpreted as a string "true"),
133    //    so its length-1 prefix is just "t".
134
135    let length1_prefix_virtual_transform_list = vec![VirtualTransform::Prefix(
136        GetPrefix::builder().length(1).build()?,
137    )];
138
139    // 2. Construct the VirtualParts required for the VirtualField
140    let has_test_result_part = VirtualPart::builder()
141        .loc("hasTestResult")
142        .trans(length1_prefix_virtual_transform_list)
143        .build()?;
144
145    let state_part = VirtualPart::builder().loc("state").build()?;
146    // Note that we do not apply any transform to the `state` attribute,
147    // and the virtual field will read in the attribute as-is.
148
149    // 3. Construct the VirtualField from the VirtualParts
150    //    Note that the order that virtual parts are added to the virtualPartList
151    //    dictates the order in which they are concatenated to build the virtual field.
152    //    You must add virtual parts in the same order on write as you do on read.
153    let virtual_part_list = vec![state_part, has_test_result_part];
154
155    let state_and_has_test_result_field = VirtualField::builder()
156        .name("stateAndHasTestResult")
157        .parts(virtual_part_list)
158        .build()?;
159
160    let virtual_field_list = vec![state_and_has_test_result_field];
161
162    // 4. Configure our beacon.
163    //    The virtual field is assumed to hold a US 2-letter state abbreviation
164    //    (56 possible values = 50 states + 6 territories) concatenated with a binary attribute
165    //    (2 possible values: true/false hasTestResult field), we expect a population size of
166    //    56 * 2 = 112 possible values.
167    //    We will also assume that these values are reasonably well-distributed across
168    //    customer IDs. In practice, this will not be true. We would expect
169    //    more populous states to appear more frequently in the database.
170    //    A more complex analysis would show that a stricter upper bound
171    //    is necessary to account for this by hiding information from the
172    //    underlying distribution.
173    //
174    //    This link provides guidance for choosing a beacon length:
175    //       https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/choosing-beacon-length.html
176    //    We follow the guidance in the link above to determine reasonable bounds for beacon length:
177    //     - min: log(sqrt(112))/log(2) ~= 3.4, round down to 3
178    //     - max: log((112/2))/log(2) ~= 5.8, round up to 6
179    //    You will somehow need to round results to a nearby integer.
180    //    We choose to round to the nearest integer; you might consider a different rounding approach.
181    //    Rounding up will return fewer expected "false positives" in queries,
182    //       leading to fewer decrypt calls and better performance,
183    //       but it is easier to identify which beacon values encode distinct plaintexts.
184    //    Rounding down will return more expected "false positives" in queries,
185    //       leading to more decrypt calls and worse performance,
186    //       but it is harder to identify which beacon values encode distinct plaintexts.
187    //    We can choose a beacon length between 3 and 6:
188    //     - Closer to 3, we expect more "false positives" to be returned,
189    //       making it harder to identify which beacon values encode distinct plaintexts,
190    //       but leading to more decrypt calls and worse performance
191    //     - Closer to 6, we expect fewer "false positives" returned in queries,
192    //       leading to fewer decrypt calls and better performance,
193    //       but it is easier to identify which beacon values encode distinct plaintexts.
194    //    As an example, we will choose 5.
195    //    Values stored in aws_dbe_b_stateAndHasTestResult will be 5 bits long (0x00 - 0x1f)
196    //    There will be 2^5 = 32 possible HMAC values.
197    //    With a well-distributed dataset (112 values), for a particular beacon we expect
198    //    (112/32) = 3.5 combinations of abbreviation + true/false attribute
199    //    sharing that beacon value.
200    let standard_beacon_list = vec![StandardBeacon::builder()
201        .name("stateAndHasTestResult")
202        .length(5)
203        .build()?];
204
205    // 5. Configure Keystore.
206    //    This example expects that you have already set up a KeyStore with a single branch key.
207    //    See the "CreateKeyStoreTableExample" and "CreateKeyStoreKeyExample" files for how to do this.
208    //    After you create a branch key, you should persist its ID for use in this example.
209    let sdk_config = aws_config::load_defaults(aws_config::BehaviorVersion::latest()).await;
210    let key_store_config = KeyStoreConfig::builder()
211        .kms_client(aws_sdk_kms::Client::new(&sdk_config))
212        .ddb_client(aws_sdk_dynamodb::Client::new(&sdk_config))
213        .ddb_table_name(branch_key_ddb_table_name)
214        .logical_key_store_name(branch_key_ddb_table_name)
215        .kms_configuration(KmsConfiguration::KmsKeyArn(
216            branch_key_wrapping_kms_key_arn.to_string(),
217        ))
218        .build()?;
219
220    let key_store = keystore_client::Client::from_conf(key_store_config)?;
221
222    // 6. Create BeaconVersion.
223    //    The BeaconVersion inside the list holds the list of beacons on the table.
224    //    The BeaconVersion also stores information about the keystore.
225    //    BeaconVersion must be provided:
226    //      - keyStore: The keystore configured in the previous step.
227    //      - keySource: A configuration for the key source.
228    //        For simple use cases, we can configure a 'singleKeySource' which
229    //        statically configures a single beaconKey. That is the approach this example takes.
230    //        For use cases where you want to use different beacon keys depending on the data
231    //        (for example if your table holds data for multiple tenants, and you want to use
232    //        a different beacon key per tenant), look into configuring a MultiKeyStore:
233    //          https://docs.aws.amazon.com/database-encryption-sdk/latest/devguide/searchable-encryption-multitenant.html
234    //    We also provide our standard beacon list and virtual fields here.
235    let beacon_version = BeaconVersion::builder()
236        .standard_beacons(standard_beacon_list)
237        .virtual_fields(virtual_field_list)
238        .version(1) // MUST be 1
239        .key_store(key_store.clone())
240        .key_source(BeaconKeySource::Single(
241            SingleKeyStore::builder()
242                // `keyId` references a beacon key.
243                // For every branch key we create in the keystore,
244                // we also create a beacon key.
245                // This beacon key is not the same as the branch key,
246                // but is created with the same ID as the branch key.
247                .key_id(branch_key_id)
248                .cache_ttl(6000)
249                .build()?,
250        ))
251        .build()?;
252    let beacon_versions = vec![beacon_version];
253
254    // 7. Create a Hierarchical Keyring
255    //    This is a KMS keyring that utilizes the keystore table.
256    //    This config defines how items are encrypted and decrypted.
257    //    NOTE: You should configure this to use the same keystore as your search config.
258    let mpl_config = MaterialProvidersConfig::builder().build()?;
259    let mpl = mpl_client::Client::from_conf(mpl_config)?;
260    let kms_keyring = mpl
261        .create_aws_kms_hierarchical_keyring()
262        .branch_key_id(branch_key_id)
263        .key_store(key_store)
264        .ttl_seconds(6000)
265        .send()
266        .await?;
267
268    // 8. Configure which attributes are encrypted and/or signed when writing new items.
269    //    For each attribute that may exist on the items we plan to write to our DynamoDbTable,
270    //    we must explicitly configure how they should be treated during item encryption:
271    //      - ENCRYPT_AND_SIGN: The attribute is encrypted and included in the signature
272    //      - SIGN_ONLY: The attribute not encrypted, but is still included in the signature
273    //      - DO_NOTHING: The attribute is not encrypted and not included in the signature
274    //    Any attributes that will be used in beacons must be configured as ENCRYPT_AND_SIGN.
275    let attribute_actions_on_encrypt = HashMap::from([
276        ("customer_id".to_string(), CryptoAction::SignOnly), // Our partition attribute must be SIGN_ONLY
277        ("create_time".to_string(), CryptoAction::SignOnly), // Our sort attribute must be SIGN_ONLY
278        ("state".to_string(), CryptoAction::EncryptAndSign), // Beaconized attributes must be encrypted
279        ("hasTestResult".to_string(), CryptoAction::EncryptAndSign), // Beaconized attributes must be encrypted
280    ]);
281
282    // 9. Create the DynamoDb Encryption configuration for the table we will be writing to.
283    //    The beaconVersions are added to the search configuration.
284    let table_config = DynamoDbTableEncryptionConfig::builder()
285        .logical_table_name(ddb_table_name)
286        .partition_key_name("customer_id")
287        .sort_key_name("create_time")
288        .attribute_actions_on_encrypt(attribute_actions_on_encrypt)
289        .keyring(kms_keyring)
290        .search(
291            SearchConfig::builder()
292                .write_version(1) // MUST be 1
293                .versions(beacon_versions)
294                .build()?,
295        )
296        .build()?;
297
298    // 10. Create config
299    let encryption_config = DynamoDbTablesEncryptionConfig::builder()
300        .table_encryption_configs(HashMap::from([(ddb_table_name.to_string(), table_config)]))
301        .build()?;
302
303    // 11. Create test items
304
305    // Create item with hasTestResult=true
306    let item_with_has_test_result = HashMap::from([
307        (
308            "customer_id".to_string(),
309            AttributeValue::S("ABC-123".to_string()),
310        ),
311        (
312            "create_time".to_string(),
313            AttributeValue::N("1681495205".to_string()),
314        ),
315        ("state".to_string(), AttributeValue::S("CA".to_string())),
316        ("hasTestResult".to_string(), AttributeValue::Bool(true)),
317    ]);
318
319    // Create item with hasTestResult=false
320    let item_with_no_has_test_result = HashMap::from([
321        (
322            "customer_id".to_string(),
323            AttributeValue::S("DEF-456".to_string()),
324        ),
325        (
326            "create_time".to_string(),
327            AttributeValue::N("1681495205".to_string()),
328        ),
329        ("state".to_string(), AttributeValue::S("CA".to_string())),
330        ("hasTestResult".to_string(), AttributeValue::Bool(false)),
331    ]);
332
333    // 12. If developing or debugging, verify config by checking virtual field values directly
334    let trans = transform_client::Client::from_conf(encryption_config.clone())?;
335    let resolve_output = trans
336        .resolve_attributes()
337        .table_name(ddb_table_name)
338        .item(item_with_has_test_result.clone())
339        .version(1)
340        .send()
341        .await?;
342
343    // CompoundBeacons is empty because we have no Compound Beacons configured
344    assert_eq!(resolve_output.compound_beacons.unwrap().len(), 0);
345
346    // Verify that VirtualFields has the expected value
347    let virtual_fields = resolve_output.virtual_fields.unwrap();
348    assert_eq!(virtual_fields.len(), 1);
349    assert_eq!(virtual_fields["stateAndHasTestResult"], "CAt");
350
351    // 13. Create a new AWS SDK DynamoDb client using the DynamoDb Encryption Interceptor above
352    let dynamo_config = aws_sdk_dynamodb::config::Builder::from(&sdk_config)
353        .interceptor(DbEsdkInterceptor::new(encryption_config)?)
354        .build();
355    let ddb = aws_sdk_dynamodb::Client::from_conf(dynamo_config);
356
357    // 14. Put two items into our table using the above client.
358    //     The two items will differ only in their `customer_id` attribute (primary key)
359    //         and their `hasTestResult` attribute.
360    //     We will query against these items to demonstrate how to use our setup above
361    //         to query against our `stateAndHasTestResult` beacon.
362    //     Before the item gets sent to DynamoDb, it will be encrypted
363    //         client-side, according to our configuration.
364    //     Since our configuration includes a beacon on a virtual field named
365    //         `stateAndHasTestResult`, the client will add an attribute
366    //         to the item with name `aws_dbe_b_stateAndHasTestResult`.
367    //         Its value will be an HMAC truncated to as many bits as the
368    //         beacon's `length` parameter; i.e. 5.
369
370    ddb.put_item()
371        .table_name(ddb_table_name)
372        .set_item(Some(item_with_has_test_result.clone()))
373        .send()
374        .await?;
375
376    ddb.put_item()
377        .table_name(ddb_table_name)
378        .set_item(Some(item_with_no_has_test_result.clone()))
379        .send()
380        .await?;
381
382    // 15. Query by stateAndHasTestResult attribute.
383    //     Note that we are constructing the query as if we were querying on plaintext values.
384    //     However, the DDB encryption client will detect that this attribute name has a beacon configured.
385    //     The client will add the beaconized attribute name and attribute value to the query,
386    //         and transform the query to use the beaconized name and value.
387    //     Internally, the client will query for and receive all items with a matching HMAC value in the beacon field.
388    //     This may include a number of "false positives" with different ciphertext, but the same truncated HMAC.
389    //     e.g. if truncate(HMAC("CAt"), 5) == truncate(HMAC("DCf"), 5), the query will return both items.
390    //     The client will decrypt all returned items to determine which ones have the expected attribute values,
391    //         and only surface items with the correct plaintext to the user.
392    //     This procedure is internal to the client and is abstracted away from the user;
393    //     e.g. the user will only see "CAt" and never "DCf", though the actual query returned both.
394    let expression_attribute_values = HashMap::from([
395        // We are querying for the item with `state`="CA" and `hasTestResult`=`true`.
396        // Since we added virtual parts as `state` then `hasTestResult`,
397        //     we must write our query expression in the same order.
398        // We constructed our virtual field as `state`+`hasTestResult`,
399        //     so we add the two parts in that order.
400        // Since we also created a virtual transform that truncated `hasTestResult`
401        //     to its length-1 prefix, i.e. "true" -> "t",
402        //     we write that field as its length-1 prefix in the query.
403        (
404            ":stateAndHasTestResult".to_string(),
405            AttributeValue::S("CAt".to_string()),
406        ),
407    ]);
408
409    // GSIs are sometimes a little bit delayed, so we retry if the query comes up empty.
410    for _i in 0..10 {
411        let query_response = ddb
412            .query()
413            .table_name(ddb_table_name)
414            .index_name(GSI_NAME)
415            .key_condition_expression("stateAndHasTestResult = :stateAndHasTestResult")
416            .set_expression_attribute_values(Some(expression_attribute_values.clone()))
417            .send()
418            .await?;
419
420        // if no results, sleep and try again
421        if query_response.items.is_none() || query_response.items.as_ref().unwrap().is_empty() {
422            std::thread::sleep(std::time::Duration::from_millis(20));
423            continue;
424        }
425
426        let attribute_values = query_response.items.unwrap();
427        // Validate only 1 item was returned: the item we just put
428        assert_eq!(attribute_values.len(), 1);
429        let returned_item = &attribute_values[0];
430        // Validate the item has the expected attributes
431        assert_eq!(returned_item["state"], AttributeValue::S("CA".to_string()));
432        assert_eq!(returned_item["hasTestResult"], AttributeValue::Bool(true));
433        break;
434    }
435    println!("virtual_beacon_searchable_encryption successful.");
436    Ok(())
437}