1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
//! Findex is a cryptographic algorithm allowing to securely maintain an encrypted index.
//!
//! It uses a generic Dictionary Encryption Scheme (Dx-Enc) as building block to implement a
//! Multi-Map Encryption Scheme (MM-Enc). A Graph Encryption Scheme (Gx-Enc) is then built on top
//! of the MM-Enc scheme and finally an `Index` trait built on top of this Gx-Enc scheme allows
//! indexing both `Data` and `Keyword`s.
//!
//! The `Index` traits is not a cryptographic one. It is used to simplify the interface and to hide
//! the details of the cryptographic implementation when it is possible.

// Macro declarations should come first.
#[macro_use]
pub mod macros;

mod edx;
mod error;
mod findex_graph;
mod findex_mm;
mod index;
mod parameters;

#[cfg(any(test, feature = "in_memory"))]
pub use edx::in_memory::{InMemoryDb, InMemoryDbError};
pub use edx::{
    chain_table::ChainTable, entry_table::EntryTable, DbInterface, DxEnc, EncryptedValue, Token,
    TokenToEncryptedValueMap, TokenWithEncryptedValueList, Tokens,
};
pub use error::{CoreError, DbInterfaceErrorTrait, Error};
pub use findex_graph::IndexedValue;
pub use findex_mm::{ENTRY_LENGTH, LINK_LENGTH};
pub use index::{
    Data, Findex, Index, IndexedValueToKeywordsMap, Keyword, KeywordToDataMap, Keywords, Label,
    UserKey,
};
pub use parameters::*;

#[cfg(test)]
mod example {
    use std::collections::HashSet;

    use cosmian_crypto_core::{reexport::rand_core::SeedableRng, CsRng, RandomFixedSizeCBytes};

    use crate::{
        ChainTable, Data, DxEnc, EntryTable, Findex, InMemoryDb, Index, IndexedValue,
        IndexedValueToKeywordsMap, Keyword, KeywordToDataMap, Keywords, Label, UserKey,
    };

    #[actix_rt::test]
    async fn index_and_search() {
        /*
         * Findex instantiation.
         */

        let mut rng = CsRng::from_entropy();
        // Let's create a new key for our index.
        let key = UserKey::new(&mut rng);
        // Findex uses a public label with the private key. Let's generate a new label.
        let label = Label::from("My public label");

        // Let's create a new index using the provided Entry and Chain table implementation and the
        // in-memory EDX implementation provided for test purpose.
        let index = Findex::new(
            EntryTable::setup(InMemoryDb::default()),
            ChainTable::setup(InMemoryDb::default()),
        );

        ////////////////////////////////////////////////////////////////////////////////
        //                                                                            //
        //  Let's associate `loc1` to `kwd1`, `loc2` to `kwd2` and `kwd2` to `kwd1`.  //
        //  The future state of the index can be represented as a JSON:               //
        //                                                                            //
        //  ```json                                                                   //
        //  {                                                                         //
        //      'kwd1' : ['loc1', 'kwd2'],                                            //
        //      'kwd2' : ['loc2'],                                                    //
        //  }                                                                         //
        //  ```                                                                       //
        //                                                                            //
        ////////////////////////////////////////////////////////////////////////////////

        let kwd1 = Keyword::from("Keyword 1");
        let kwd2 = Keyword::from("Keyword 2");
        let loc1 = Data::from("Location 1");
        let loc2 = Data::from("Location 2");

        let res = index
            .add(
                &key,
                &label,
                IndexedValueToKeywordsMap::from_iter([
                    (
                        IndexedValue::Data(loc1.clone()),
                        HashSet::from_iter([kwd1.clone()]),
                    ),
                    (
                        IndexedValue::Data(loc2.clone()),
                        HashSet::from_iter([kwd2.clone()]),
                    ),
                    (
                        IndexedValue::Pointer(kwd2.clone()),
                        HashSet::from_iter([kwd1.clone()]),
                    ),
                ]),
            )
            .await
            .expect("Error while indexing additions.");

        // Two new keywords were added to the index.
        assert_eq!(2, res.len());

        let res = index
            .search(
                &key,
                &label,
                Keywords::from_iter([kwd1.clone()]),
                &|_| async { Ok(false) },
            )
            .await
            .expect("Error while searching.");

        // Searching for `kwd1` also retrieves `loc2` since `kwd2` is associated to `kwd1` and that
        // Findex search is recursive.
        assert_eq!(
            res,
            KeywordToDataMap::from_iter([(
                kwd1.clone(),
                HashSet::from_iter([loc1.clone(), loc2.clone()])
            )])
        );

        ////////////////////////////////////////////////////////////////////////////////
        //                                                                            //
        //  Let's delete the association `kwd1`->`kwd2`. This actually associates the //
        //  negation of `kwd2` to `kwd1`.                                             //
        //                                                                            //
        //  ```json                                                                   //
        //  {                                                                         //
        //      'kwd1' : ['loc1', 'kwd2', !'kwd2'],                                   //
        //      'kwd2' : ['loc2'],                                                    //
        //  }                                                                         //
        //  ```                                                                       //
        //                                                                            //
        ////////////////////////////////////////////////////////////////////////////////

        let res = index
            .delete(
                &key,
                &label,
                IndexedValueToKeywordsMap::from_iter([(
                    IndexedValue::Pointer(kwd2.clone()),
                    HashSet::from_iter([kwd1.clone()]),
                )]),
            )
            .await
            .expect("Error while indexing deletions.");

        // No new keyword were added to the index.
        assert_eq!(0, res.len());

        let res = index
            .search(
                &key,
                &label,
                Keywords::from_iter([kwd1.clone()]),
                &|_| async { Ok(false) },
            )
            .await
            .expect("Error while searching.");

        // Searching for `kwd1` no longer retrieves `loc2`.
        assert_eq!(
            res,
            KeywordToDataMap::from_iter([(kwd1, HashSet::from_iter([loc1.clone()]))])
        );

        ////////////////////////////////////////////////////////////////////////////////
        //                                                                            //
        //  Let's compact the index in order to collapse the negation.                //
        //                                                                            //
        //  ```json                                                                   //
        //  {                                                                         //
        //      'kwd1' : ['loc1'],                                                    //
        //      'kwd2' : ['loc2'],                                                    //
        //  }                                                                         //
        //  ```                                                                       //
        //                                                                            //
        ////////////////////////////////////////////////////////////////////////////////

        // Before compacting, the Entry Table holds 2 lines since two keywords were indexed.
        let et_length = index.findex_graph.findex_mm.entry_table.len();
        assert_eq!(2, et_length);

        // Before compacting, the Entry Table holds 3 lines since four associations were indexed
        // but two of them were indexed for the same keyword in the same `add` operations and the
        // indexed values are small enough to hold in the same line.
        let ct_length = index.findex_graph.findex_mm.chain_table.len();
        assert_eq!(3, ct_length);

        let res = index
            .compact(&key, &key, &label, &label, 1., &|res| async { Ok(res) })
            .await;

        // Ooops we forgot to renew either the key or the label!
        assert!(res.is_err());

        // A new label is easier to propagate since this is public information.
        let new_label = Label::from("second label");

        index
            .compact(&key, &key, &label, &new_label, 1f64, &|res| async {
                Ok(res)
            })
            .await
            .unwrap();

        // `new_label` is the new `label`.
        let label = new_label;

        // After compacting, the Entry Table still holds 2 lines since each indexed keyword still
        // holds at least one association.
        let et_length = index.findex_graph.findex_mm.entry_table.len();
        assert_eq!(2, et_length);

        // After compacting, the Chain Table holds 2 lines since the two associations
        // `kwd1`->`kwd2` and `kwd1`->!`kwd2` collapsed.
        let ct_length = index.findex_graph.findex_mm.chain_table.len();
        assert_eq!(2, ct_length);

        ////////////////////////////////////////////////////////////////////////////////
        //                                                                            //
        //  Let's delete the association `loc2`->`kwd2` and compact the index in      //
        //  order to collapse the negation. Since `kwd2` indexes no more keyword,     //
        //  it should be removed from the index:                                      //
        //                                                                            //
        //  ```json                                                                   //
        //  {                                                                         //
        //      'kwd1' : ['loc1'],                                                    //
        //  }                                                                         //
        //  ```                                                                       //
        //                                                                            //
        ////////////////////////////////////////////////////////////////////////////////

        index
            .delete(
                &key,
                &label,
                IndexedValueToKeywordsMap::from_iter([(
                    IndexedValue::Data(loc2),
                    HashSet::from_iter([kwd2.clone()]),
                )]),
            )
            .await
            .expect("Error while indexing deletions.");

        // The Entry Table still holds 2 lines since no more keywords were indexed.
        let et_length = index.findex_graph.findex_mm.entry_table.len();
        assert_eq!(2, et_length);

        // The Chain Table holds 3 lines since a new association was indexed.
        let ct_length = index.findex_graph.findex_mm.chain_table.len();
        assert_eq!(3, ct_length);

        let new_label = Label::from("third label");
        index
            .compact(&key, &key, &label, &new_label, 1f64, &|res| async {
                Ok(res)
            })
            .await
            .unwrap();
        let label = new_label;

        // The Entry Table now holds only 1 line since `kwd2` was not associated to any indexed
        // value anymore.
        let et_length = index.findex_graph.findex_mm.entry_table.len();
        assert_eq!(1, et_length);

        // The Chain Table holds 1 lines since a two associations collapsed.
        let ct_length = index.findex_graph.findex_mm.chain_table.len();
        assert_eq!(1, ct_length);

        ////////////////////////////////////////////////////////////////////////////////
        //                                                                            //
        //  It is possible to filter out indexed values from the index during the     //
        //  compact operation. This is useful when indexed values become obsolete     //
        //  but the index was not updated. A `data_filter` callback can be given to   //
        //  the compact operation. It is fed with the indexed values read during      //
        //  the compact operation. Only those returned are indexed back.              //
        //                                                                            //
        //  In this example, the `loc1` value will be filtered out. The index should  //
        //  then be empty since the `kwd1` will not be associated to any value.       //
        //                                                                            //
        //  ```json                                                                   //
        //  {}                                                                        //
        //  ```                                                                       //
        //                                                                            //
        ////////////////////////////////////////////////////////////////////////////////

        let new_label = Label::from("fourth label");
        index
            .compact(&key, &key, &label, &new_label, 1f64, &|data| async {
                let remaining_data = data.into_iter().filter(|v| v != &loc1).collect();
                Ok(remaining_data)
            })
            .await
            .unwrap();
        let _label = new_label;

        let et_length = index.findex_graph.findex_mm.entry_table.len();
        assert_eq!(0, et_length);

        let ct_length = index.findex_graph.findex_mm.chain_table.len();
        assert_eq!(0, ct_length);
    }
}