Skip to main content

ironoxide/
search.rs

1//! Blind Index Search API
2//!
3//! This is a technique that allows you to hide the terms that have been indexed. This particular implementation uses tri-grams, which
4//! are salted and hashed to produce the list of tokens.
5//!
6//! ## BlindIndexSearch
7//!
8//! The BlindIndexSearch gives the ability to generate queries as well as create the search entries to store.
9//!
10//! # Optional
11//! This requires the optional `beta` feature to be enabled.
12
13#[cfg(feature = "blocking")]
14use crate::blocking::BlockingIronOxide;
15
16use crate::{
17    IronOxide, IronOxideErr, Result,
18    document::{
19        DocumentEncryptOpts,
20        advanced::{DocumentAdvancedOps, DocumentEncryptUnmanagedResult},
21    },
22    group::GroupId,
23    internal::take_lock,
24};
25use futures::Future;
26pub use ironcore_search_helpers::transliterate_string;
27use ironcore_search_helpers::{
28    generate_hashes_for_string, generate_hashes_for_string_with_padding,
29};
30use rand::{
31    self, RngCore, SeedableRng,
32    rngs::{OsRng, adapter::ReseedingRng},
33};
34use rand_chacha::ChaChaCore;
35use serde::{Deserialize, Serialize};
36use std::{
37    collections::HashSet,
38    convert::{TryFrom, TryInto},
39    ops::DerefMut,
40    sync::Mutex,
41};
42
43///The required length of the salt.
44const REQUIRED_LEN: usize = 32;
45/// number of bytes that can be read from `BlindIndexSearch.rng` before it is reseeded. 1 MB
46const BYTES_BEFORE_RESEEDING: u64 = 1024 * 1024;
47
48#[derive(Debug, PartialEq, Clone, Hash, Eq, Serialize, Deserialize)]
49#[serde(rename_all = "camelCase")]
50pub struct EncryptedBlindIndexSalt {
51    pub encrypted_deks: Vec<u8>,
52    pub encrypted_salt_bytes: Vec<u8>,
53}
54
55impl EncryptedBlindIndexSalt {
56    //encrypt the blind index salt and give back the BlindIndexSearch object.
57    pub async fn initialize_search(&self, ironoxide: &IronOxide) -> Result<BlindIndexSearch> {
58        let decrypted_value = ironoxide
59            .document_decrypt_unmanaged(&self.encrypted_salt_bytes[..], &self.encrypted_deks[..])
60            .await?;
61        decrypted_value.decrypted_data().try_into()
62    }
63
64    #[cfg(feature = "blocking")]
65    pub fn initialize_search_blocking(&self, bio: &BlockingIronOxide) -> Result<BlindIndexSearch> {
66        bio.runtime.block_on(self.initialize_search(&bio.ironoxide))
67    }
68}
69
70///Trait which gives the ability to create a blind index.
71pub trait BlindIndexSearchInitialize {
72    ///Create an index and encrypt it to the provided group_id.
73    fn create_blind_index(
74        &self,
75        group_id: &GroupId,
76    ) -> impl Future<Output = Result<EncryptedBlindIndexSalt>> + Send;
77}
78
79impl BlindIndexSearchInitialize for IronOxide {
80    async fn create_blind_index(&self, group_id: &GroupId) -> Result<EncryptedBlindIndexSalt> {
81        let salt = {
82            let mut mut_salt = [0u8; 32];
83            take_lock(&self.rng).deref_mut().fill_bytes(&mut mut_salt);
84            mut_salt
85        };
86
87        let encrypted_salt = self
88            .document_encrypt_unmanaged(
89                salt.into(),
90                &DocumentEncryptOpts::with_explicit_grants(
91                    None,
92                    None,
93                    false,
94                    vec![group_id.into()],
95                ),
96            )
97            .await?;
98        encrypted_salt.try_into()
99    }
100}
101
102#[derive(Debug)]
103pub struct BlindIndexSearch {
104    decrypted_salt: [u8; 32],
105    rng: Mutex<ReseedingRng<ChaChaCore, OsRng>>,
106}
107
108impl TryFrom<&[u8]> for BlindIndexSearch {
109    type Error = IronOxideErr;
110    fn try_from(bytes: &[u8]) -> Result<BlindIndexSearch> {
111        let decrypted_len = bytes.len();
112        if decrypted_len != REQUIRED_LEN {
113            Err(IronOxideErr::WrongSizeError(
114                Some(decrypted_len),
115                Some(REQUIRED_LEN),
116            ))
117        } else {
118            let mut a = [0u8; 32];
119            a.copy_from_slice(&bytes[0..32]);
120            Ok(BlindIndexSearch::new(a))
121        }
122    }
123}
124
125impl TryFrom<DocumentEncryptUnmanagedResult> for EncryptedBlindIndexSalt {
126    type Error = IronOxideErr;
127    fn try_from(r: DocumentEncryptUnmanagedResult) -> Result<EncryptedBlindIndexSalt> {
128        match r.access_errs().get(0) {
129            None => Ok(EncryptedBlindIndexSalt {
130                encrypted_deks: r.encrypted_deks().to_vec(),
131                encrypted_salt_bytes: r.encrypted_data().to_vec(),
132            }),
133            Some(err) => Err(IronOxideErr::UserOrGroupDoesNotExist(
134                err.user_or_group.clone(),
135            )),
136        }
137    }
138}
139
140impl BlindIndexSearch {
141    fn new(decrypted_salt: [u8; 32]) -> BlindIndexSearch {
142        let rng = Mutex::new(ReseedingRng::new(
143            rand_chacha::ChaChaCore::from_entropy(),
144            BYTES_BEFORE_RESEEDING,
145            OsRng::default(),
146        ));
147        BlindIndexSearch {
148            decrypted_salt,
149            rng,
150        }
151    }
152
153    /// Generate the list of tokens to use to find entries that match the search query, given the specified partition_id.
154    /// query - The string you want to tokenize and hash
155    /// partition_id - An extra string you want to include in every hash, this allows 2 queries with different partition_ids to produce a different set of tokens for the same query
156    pub fn tokenize_query(&self, query: &str, partition_id: Option<&str>) -> Result<HashSet<u32>> {
157        generate_hashes_for_string(query, partition_id, &self.decrypted_salt[..])
158            .map_err(|message| IronOxideErr::ValidationError("query".to_string(), message))
159    }
160
161    /// Generate the list of tokens to create a search entry for `data`. This function will also return some random values in the HashSet, which will make
162    /// it harder for someone to know what the input was. Because of this, calling this function will not be the same as `tokenize_query`, but `tokenize_query` will always
163    /// return a subset of the values returned by `tokenize_data`.
164    ///
165    /// data - The string you want to tokenize and hash
166    /// partition_id - An extra string you want to include in every hash, this allows 2 queries with different partition_ids to produce a different set of tokens for the same data
167    pub fn tokenize_data(&self, data: &str, partition_id: Option<&str>) -> Result<HashSet<u32>> {
168        generate_hashes_for_string_with_padding(
169            data,
170            partition_id,
171            &self.decrypted_salt[..],
172            &self.rng,
173        )
174        .map_err(|message| IronOxideErr::ValidationError("data".to_string(), message))
175    }
176}
177
178#[cfg(test)]
179mod tests {
180    use super::*;
181    use galvanic_assert::*;
182
183    #[test]
184    fn try_from_works_for_correct_size() -> Result<()> {
185        let bytes = [0u8; 32];
186        let _: BlindIndexSearch = (&bytes[..]).try_into()?;
187        Ok(())
188    }
189    #[test]
190    fn try_from_errors_for_incorrect_size() -> Result<()> {
191        let bytes = [0u8; 100];
192        let maybe_error: Result<BlindIndexSearch> = (&bytes[..]).try_into();
193        let error = maybe_error.unwrap_err();
194        assert_that!(&error, is_variant!(IronOxideErr::WrongSizeError));
195        Ok(())
196    }
197}