1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
//! Contains structs for indexing a record based on a single [`Mapping`]

use super::{
    errors::{IndexerError, LoadIndexerError},
    mapping::{Mapping, MappingWithMeta},
    vector::{EncryptedTerm, TermVector, VectorTerm},
};
use crate::record::{Record, Value};

use hex_literal::hex;
use ore_encoding_rs::{siphash, OrePlaintext};
use ore_rs::{scheme::bit2::OREAES128, ORECipher, OREEncrypt};

/// Encrypt string terms using siphash and ORE for match queries
fn encrypt_string_terms(
    ore: &mut OREAES128,
    index_id: &[u8; 16],
    record_id: &[u8; 16],
    terms: Vec<String>,
) -> Result<Option<TermVector>, IndexerError> {
    let terms = terms
        .into_iter()
        .map(|term| OrePlaintext::from(siphash(term.as_bytes())))
        .map(|plaintext| {
            plaintext
                .0
                .encrypt(ore)
                .map(EncryptedTerm::from)
                .map(|term| VectorTerm {
                    term,
                    link: *record_id,
                })
        })
        .collect::<Result<Vec<_>, _>>()?;

    // If there are no string terms there is no point inserting anything into the database - just
    // return None.
    if terms.is_empty() {
        return Ok(None);
    }

    Ok(Some(TermVector {
        terms,
        index_id: *index_id,
    }))
}

/// An Indexer that indexs a record based on a single index.
pub struct MappingIndexer {
    ore: OREAES128,
    pub(crate) index_id: [u8; 16],
    pub(crate) mapping: Mapping,
}

impl MappingIndexer {
    /// Load a single MappingIndexer from a [`MappingWithMeta`]
    pub fn from_mapping(mapping: MappingWithMeta) -> Result<Self, LoadIndexerError> {
        let seed = hex!("00010203 04050607");

        let ore = OREAES128::init(mapping.prf_key, mapping.prp_key, &seed)
            .map_err(|e| LoadIndexerError::Other(format!("ORE init failed: {:?}", e)))?;

        Ok(Self {
            ore,
            index_id: mapping.index_id,
            mapping: mapping.mapping,
        })
    }

    /// Create a [`TermVector`] from a single term for the current mapping
    pub fn vector_from_term(&self, term: VectorTerm) -> TermVector {
        TermVector {
            terms: vec![term],
            index_id: self.index_id,
        }
    }

    /// Encrypt a [`Record`] using ORE
    ///
    /// If the Record contains no meaningful data to encrypt for this index, this method will
    /// return `None`.
    pub fn encrypt(&mut self, record: &Record) -> Result<Option<TermVector>, IndexerError> {
        let id = record.id;

        Ok(match &self.mapping {
            Mapping::Exact { field } => {
                if let Some(plaintext) = record
                    .index_with_dot_notation(field)
                    .and_then(|x| x.as_plaintext())
                {
                    let term = plaintext.0.encrypt(&mut self.ore)?.into();

                    Some(self.vector_from_term(VectorTerm { term, link: id }))
                } else {
                    None
                }
            }

            Mapping::Range { field } => {
                if let Some(val) = record.index_with_dot_notation(field) {
                    if let Value::String(_) = val {
                        // Range isn't supported for strings (yet) - so just exclude them
                        None
                    } else if let Some(plaintext) = val.as_plaintext() {
                        let term = plaintext.0.encrypt(&mut self.ore)?.into();
                        Some(self.vector_from_term(VectorTerm { term, link: id }))
                    } else {
                        // If the current value doesn't have a plaintext representation (it may be
                        // a map or array) then just exclude it from the index.
                        None
                    }
                } else {
                    None
                }
            }

            Mapping::Match { fields, pipeline } => encrypt_string_terms(
                &mut self.ore,
                &self.index_id,
                &record.id,
                // Match only operates on specified fields and only if they are strings
                pipeline.process(record.extract_string_fields(fields)),
            )?,

            Mapping::DynamicMatch { pipeline } => encrypt_string_terms(
                &mut self.ore,
                &self.index_id,
                &record.id,
                // Dynamic match operates on all string fields
                pipeline.process(record.extract_all_string_fields()),
            )?,

            Mapping::FieldDynamicMatch { pipeline } => encrypt_string_terms(
                &mut self.ore,
                &self.index_id,
                &record.id,
                record
                    .extract_all_string_fields_and_keys()
                    .into_iter()
                    .flat_map(|(k, v)| {
                        pipeline
                            .process(vec![v])
                            .into_iter()
                            // In order to specifiy fields and terms to be searches they must be
                            // encrypted in this "field:value" format. If this isn't done querying will
                            // silently fail.
                            .map(move |t| format!("{}:{}", k, t))
                    })
                    .collect(),
            )?,
        })
    }
}

#[cfg(test)]
mod tests {
    use crate::{
        indexer::mapping::{Mapping, MappingWithMeta},
        record::Record,
        test_utils::collection,
    };

    use super::MappingIndexer;

    #[test]
    fn test_compare_exact_matches() {
        let record = Record {
            id: [0; 16],
            fields: collection! {
                "test" => "test-string"
            },
        };

        let mut left_indexer = MappingIndexer::from_mapping(MappingWithMeta {
            mapping: Mapping::Exact {
                field: "test".into(),
            },
            prp_key: [0; 16],
            prf_key: [1; 16],
            index_id: [2; 16],
        })
        .expect("Failed to gen left indexer");

        let mut right_indexer = MappingIndexer::from_mapping(MappingWithMeta {
            mapping: Mapping::Exact {
                field: "test".into(),
            },
            prp_key: [0; 16],
            prf_key: [1; 16],
            index_id: [2; 16],
        })
        .expect("Failed to gen right indexer");

        let left = left_indexer
            .encrypt(&record)
            .expect("Failed to encrypt")
            .unwrap();
        let right = right_indexer
            .encrypt(&record)
            .expect("Failed to encrypt")
            .unwrap();

        assert_eq!(left.terms[0], right.terms[0]);
    }

    #[test]
    fn test_compare_not_exact() {
        let first = Record {
            id: [0; 16],
            fields: collection! {
                "test" => "test-string"
            },
        };

        let second = Record {
            id: [1; 16],
            fields: collection! {
                "test" => "test-different-string"
            },
        };

        let mut indexer = MappingIndexer::from_mapping(MappingWithMeta {
            mapping: Mapping::Exact {
                field: "test".into(),
            },
            prp_key: [0; 16],
            prf_key: [1; 16],
            index_id: [2; 16],
        })
        .expect("Failed to gen left indexer");

        let left = indexer.encrypt(&first).expect("Failed to encrypt").unwrap();
        let right = indexer
            .encrypt(&second)
            .expect("Failed to encrypt")
            .unwrap();

        assert_ne!(left.terms[0], right.terms[0]);
    }

    #[test]
    fn test_compare_range() {
        let first = Record {
            id: [0; 16],
            fields: collection! {
                "test" => 10
            },
        };

        let second = Record {
            id: [1; 16],
            fields: collection! {
                "test" => 20
            },
        };

        let mut indexer = MappingIndexer::from_mapping(MappingWithMeta {
            mapping: Mapping::Range {
                field: "test".into(),
            },
            prp_key: [0; 16],
            prf_key: [1; 16],
            index_id: [2; 16],
        })
        .expect("Failed to gen left indexer");

        let left = indexer.encrypt(&first).expect("Failed to encrypt").unwrap();
        let right = indexer
            .encrypt(&second)
            .expect("Failed to encrypt")
            .unwrap();

        assert!(left.terms[0] < right.terms[0]);
    }
}