dodo/index/mod.rs
1//! Index of values.
2
3use std::borrow::Borrow;
4use std::collections::{HashMap, HashSet};
5use std::hash::Hash;
6use std::marker::PhantomData;
7use std::str::FromStr;
8use std::vec;
9
10use uuid::Uuid;
11
12pub use error::*;
13
14use crate::serializer::Serializer;
15use crate::storage::Storage;
16
17mod error;
18
19type IndexDocument = HashMap<String, HashSet<Uuid>>;
20
21/// Index of values.
22///
23/// Indexes are usually paired with collections to make some queries faster. They allow to quickly
24/// locate an entity id without having to search the whole collection.
25///
26/// # Example
27///
28/// ```
29/// use dodo::prelude::*;
30/// # use serde::{Deserialize, Serialize};
31/// # use uuid::Uuid;
32/// #
33/// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
34/// # #[serde(rename_all = "camelCase")]
35/// # struct Person { id: Option<Uuid>, name : String, age: u64 }
36/// #
37/// # impl Person {
38/// # fn new(name : &str) -> Self { Self { id : None, name : name.into(), age : 42 } }
39/// # }
40///
41/// type PersonCollection = Collection<Person, Directory, JsonSerializer>;
42/// type NameIndex = Index<String, Directory, JsonSerializer>;
43///
44/// fn main() -> Result<(), Box<dyn std::error::Error>> {
45/// # let collection_path = tempfile::tempdir()?;
46/// # let index_path = tempfile::tempdir()?;
47/// let mut collection = PersonCollection::new(Directory::new(&collection_path)?);
48/// let mut index = NameIndex::new(Directory::new(&index_path)?);
49///
50/// let mut person1 = Person::new("John Smith");
51/// collection.insert(&mut person1)?;
52/// index.add(person1.id.unwrap(), &person1.name)?;
53/// let mut person2 = Person::new("John Smith");
54/// collection.insert(&mut person2)?;
55/// index.add(person2.id.unwrap(), &person2.name)?;
56/// let mut person3 = Person::new("Mary Smith");
57/// collection.insert(&mut person3)?;
58/// index.add(person3.id.unwrap(), &person3.name)?;
59///
60/// // Ids of all "John Smith"s.
61/// let ids = index.find("John Smith")?;
62///
63/// println!("{:?}", ids);
64///
65/// Ok(())
66/// }
67/// ```
68///
69/// # Storage optimisations
70///
71/// Like collections, indexes assume to have complete control of their storage, but if needed, they
72/// can share a common storage as long as each index have a different id assigned to it. See
73/// the `with_index` function for details.
74///
75/// # Serializer
76///
77/// Like collections, indexes need a serializer to serializer the index data.
78///
79/// # Other considerations
80///
81/// You should only index basic types, like numbers or strings. You can still index any value
82/// as long as it implements [Hash](https://doc.rust-lang.org/stable/std/hash/trait.Hash.html),
83/// [ToString](https://doc.rust-lang.org/stable/std/string/trait.ToString.html) and
84/// [FromStr](https://doc.rust-lang.org/stable/std/str/trait.FromStr.html), but it should be
85/// considerez bad practice.
86#[derive(Debug, Clone)]
87pub struct Index<T, S, R> {
88 id: Uuid,
89 storage: S,
90 _t: PhantomData<T>,
91 _r: PhantomData<R>,
92}
93
94impl<T, S, R> Index<T, S, R>
95 where S: Storage,
96 R: Serializer {
97 /// Create a new index, using provided storage.
98 ///
99 /// When created like this, the index assumes that it has complete control over the storage
100 /// and stores the index data a single file named `00000000-0000-0000-0000-000000000000`. If
101 /// you want to reuse a storage for multiple indexes, see the `with_id` function.
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// use dodo::prelude::*;
107 /// # use serde::{Deserialize, Serialize};
108 /// # use uuid::Uuid;
109 /// #
110 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
111 /// # #[serde(rename_all = "camelCase")]
112 /// # struct Person { id: Option<Uuid>, name : String, age: u64 }
113 /// #
114 /// # impl Person {
115 /// # fn new(name : &str) -> Self { Self { id : None, name : name.into(), age : 42 } }
116 /// # }
117 ///
118 /// type NameIndex = Index<String, Directory, JsonSerializer>;
119 ///
120 /// fn main() -> Result<(), Box<dyn std::error::Error>> {
121 /// # let index_path = tempfile::tempdir()?;
122 /// let mut index = NameIndex::new(Directory::new(&index_path)?);
123 ///
124 /// Ok(())
125 /// }
126 /// ```
127 pub fn new(storage: S) -> Self {
128 Self::with_id(storage, Uuid::nil())
129 }
130
131 /// Create a new index inside provided storage, using assigned id.
132 ///
133 /// When created like this, the index knows that it does not have complete control over the
134 /// storage and stores the index data a single file named after his assigned id.
135 ///
136 /// # Examples
137 ///
138 /// ```
139 /// use dodo::prelude::*;
140 /// # use serde::{Deserialize, Serialize};
141 /// # use uuid::Uuid;
142 /// #
143 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
144 /// # #[serde(rename_all = "camelCase")]
145 /// # struct Person { id: Option<Uuid>, name : String, age: u64 }
146 /// #
147 /// # impl Person {
148 /// # fn new(name : &str) -> Self { Self { id : None, name : name.into(), age : 42 } }
149 /// # }
150 ///
151 /// type NameIndex = Index<String, Directory, JsonSerializer>;
152 ///
153 /// fn main() -> Result<(), Box<dyn std::error::Error>> {
154 /// # let index_path = tempfile::tempdir()?;
155 /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
156 /// let mut index = NameIndex::with_id(Directory::new(&index_path)?, id);
157 ///
158 /// Ok(())
159 /// }
160 /// ```
161 pub fn with_id(storage: S, id: Uuid) -> Self {
162 Self {
163 id,
164 storage,
165 _t: PhantomData,
166 _r: PhantomData,
167 }
168 }
169
170 /// Returns all ids corresponding to the given value.
171 ///
172 /// Returns an error if not found.
173 ///
174 /// # Examples
175 ///
176 /// ```
177 /// # use dodo::{prelude::*, storage::Memory};
178 /// # use serde::{Deserialize, Serialize};
179 /// # use uuid::Uuid;
180 /// #
181 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
182 /// # #[serde(rename_all = "camelCase")]
183 /// # struct Person { id: Option<Uuid>, age: u64 }
184 /// # type NameIndex = Index<String, Memory, JsonSerializer>;
185 /// #
186 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
187 /// # let index = NameIndex::new(Memory::new());
188 /// #
189 /// let ids = index.find("John Smith");
190 ///
191 /// match ids {
192 /// Ok(ids) => println!("Found!"),
193 /// Err(e) if e.is_not_found() => println!("Not found!"),
194 /// Err(_) => println!("Other error!")
195 /// }
196 /// #
197 /// # Ok(())
198 /// # }
199 /// ```
200 pub fn find<Q>(&self, value: &Q) -> Result<HashSet<Uuid>>
201 where T: Borrow<Q>,
202 Q: Hash + ToString + ?Sized {
203 let mut index_document = self.read()?;
204
205 let value = value.to_string();
206 index_document.remove(&value).ok_or_else(|| IndexError::not_found())
207 }
208
209 /// Provide an iterator through the index entries.
210 ///
211 /// This can be pretty useful to query all index values.
212 ///
213 /// # Examples
214 ///
215 /// ```
216 /// # use dodo::{prelude::*, storage::Memory};
217 /// # use serde::{Deserialize, Serialize};
218 /// # use uuid::Uuid;
219 /// # use std::collections::HashSet;
220 /// #
221 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
222 /// # #[serde(rename_all = "camelCase")]
223 /// # struct Person { id: Option<Uuid>, age: u64 }
224 /// # type NameIndex = Index<String, Memory, JsonSerializer>;
225 /// #
226 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
227 /// # let index = NameIndex::new(Memory::new());
228 /// #
229 /// let names: Vec<String> = index.find_all()?.map(|(ids, name) : (HashSet<Uuid>, String)| name).collect();
230 ///
231 /// println!("{:#?}", names);
232 /// #
233 /// # Ok(())
234 /// # }
235 /// ```
236 pub fn find_all(&self) -> Result<IndexIterator<T>>
237 where T: FromStr {
238 IndexIterator::new(self.read()?)
239 }
240
241 /// Add value to the index, assigned to provided id.
242 ///
243 /// The index doesn't keep multiple copies of the same key/value pair (i.e there are no
244 /// duplicates).
245 ///
246 /// # Examples
247 ///
248 /// ```
249 /// # use dodo::{prelude::*, storage::Memory};
250 /// # use serde::{Deserialize, Serialize};
251 /// # use uuid::Uuid;
252 /// # use std::collections::HashSet;
253 /// #
254 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
255 /// # #[serde(rename_all = "camelCase")]
256 /// # struct Person { id: Option<Uuid>, age: u64 }
257 /// # type NameIndex = Index<String, Memory, JsonSerializer>;
258 /// #
259 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
260 /// # let mut index = NameIndex::new(Memory::new());
261 /// #
262 /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
263 /// let value = "John Smith".into();
264 ///
265 /// index.add(id, &value)?;
266 /// #
267 /// # Ok(())
268 /// # }
269 /// ```
270 pub fn add(&mut self, id: Uuid, value: &T) -> Result<()>
271 where T: Hash + ToString {
272 let mut index_document = self.read()?;
273
274 let mut has_changed = false;
275 has_changed = has_changed || index_document.entry(value.to_string()).or_insert_with(|| {
276 has_changed = true;
277 Default::default()
278 }).insert(id);
279
280 if has_changed {
281 self.write(index_document)?;
282 }
283 Ok(())
284 }
285
286 /// Remove id/value pair from index.
287 ///
288 /// This does not fail if the pair doesn't exist in the index. Instead, this returns a
289 /// boolean : if true, the pair was removed, and if false, the pair was not found.
290 ///
291 /// # Examples
292 ///
293 /// ```
294 /// # use dodo::{prelude::*, storage::Memory};
295 /// # use serde::{Deserialize, Serialize};
296 /// # use uuid::Uuid;
297 /// # use std::collections::HashSet;
298 /// #
299 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
300 /// # #[serde(rename_all = "camelCase")]
301 /// # struct Person { id: Option<Uuid>, age: u64 }
302 /// # type NameIndex = Index<String, Memory, JsonSerializer>;
303 /// #
304 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
305 /// # let mut index = NameIndex::new(Memory::new());
306 /// #
307 /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
308 /// let value : String = "John Smith".into();
309 /// index.add(id, &value)?;
310 ///
311 /// assert!(index.remove(id, &value)?);
312 /// #
313 /// # Ok(())
314 /// # }
315 /// ```
316 pub fn remove<Q>(&mut self, id: Uuid, value: &Q) -> Result<bool>
317 where T: Borrow<Q>,
318 Q: Hash + ToString + ?Sized {
319 let mut index_document = self.read()?;
320
321 let value = value.to_string();
322 let mut has_changed = false;
323 if let Some(ids) = index_document.get_mut(&value) {
324 has_changed = ids.remove(&id);
325 if ids.is_empty() { index_document.remove(&value); }
326 }
327
328 if has_changed { self.write(index_document)?; }
329 Ok(has_changed)
330 }
331
332 /// Remove id from index.
333 ///
334 /// This has some serious performance implications as it must traverse the entire index and
335 /// should only be used when unavoidable.
336 ///
337 /// This does not fail if the id doesn't exist in the index. Instead, this returns a boolean :
338 /// if true, the id was removed, and if false, the id was not found.
339 ///
340 /// # Examples
341 ///
342 /// ```
343 /// # use dodo::{prelude::*, storage::Memory};
344 /// # use serde::{Deserialize, Serialize};
345 /// # use uuid::Uuid;
346 /// # use std::collections::HashSet;
347 /// #
348 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
349 /// # #[serde(rename_all = "camelCase")]
350 /// # struct Person { id: Option<Uuid>, age: u64 }
351 /// # type NameIndex = Index<String, Memory, JsonSerializer>;
352 /// #
353 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
354 /// # let mut index = NameIndex::new(Memory::new());
355 /// #
356 /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
357 /// let value : String = "John Smith".into();
358 /// index.add(id, &value)?;
359 ///
360 /// assert!(index.remove_id(id)?);
361 /// #
362 /// # Ok(())
363 /// # }
364 /// ```
365 pub fn remove_id(&mut self, id: Uuid) -> Result<bool> {
366 let mut index_document = self.read()?;
367
368 let mut has_changed = false;
369 index_document.retain(|_, it| {
370 has_changed = has_changed || it.remove(&id);
371 it.len() > 0
372 });
373
374 if has_changed { self.write(index_document)?; }
375 Ok(has_changed)
376 }
377
378 /// Remove value from index and all related ids.
379 ///
380 /// Returns the ids assigned to the removed value. Note that theses ids can still be in the
381 /// index, but assigned to others values.
382 ///
383 /// This does not fail if the value doesn't exist in the index. In that case, this returns an
384 /// empty set of ids.
385 ///
386 /// # Examples
387 ///
388 /// ```
389 /// # use dodo::{prelude::*, storage::Memory};
390 /// # use serde::{Deserialize, Serialize};
391 /// # use uuid::Uuid;
392 /// # use std::collections::HashSet;
393 /// #
394 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
395 /// # #[serde(rename_all = "camelCase")]
396 /// # struct Person { id: Option<Uuid>, age: u64 }
397 /// # type NameIndex = Index<String, Memory, JsonSerializer>;
398 /// #
399 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
400 /// # let mut index = NameIndex::new(Memory::new());
401 /// #
402 /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
403 /// let value : String = "John Smith".into();
404 /// index.add(id, &value)?;
405 ///
406 /// let ids = index.remove_value(&value)?;
407 /// assert!(ids.contains(&id));
408 /// #
409 /// # Ok(())
410 /// # }
411 /// ```
412 pub fn remove_value<Q>(&mut self, value: &Q) -> Result<HashSet<Uuid>>
413 where T: Borrow<Q>,
414 Q: Hash + ToString + ?Sized {
415 let mut index_document = self.read()?;
416
417 match index_document.remove(&value.to_string()) {
418 Some(ids) => {
419 self.write(index_document)?;
420 Ok(ids)
421 }
422 None => {
423 Ok(Default::default())
424 }
425 }
426 }
427
428 /// Remove every value in this index.
429 ///
430 /// Everything in this index will be deleted. Use at your own risks.
431 ///
432 /// # Examples
433 ///
434 /// ```
435 /// # use dodo::{prelude::*, storage::Memory};
436 /// # use serde::{Deserialize, Serialize};
437 /// # use uuid::Uuid;
438 /// # use std::collections::HashSet;
439 /// #
440 /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
441 /// # #[serde(rename_all = "camelCase")]
442 /// # struct Person { id: Option<Uuid>, age: u64 }
443 /// # type NameIndex = Index<String, Memory, JsonSerializer>;
444 /// #
445 /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
446 /// # let mut index = NameIndex::new(Memory::new());
447 /// #
448 /// # let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
449 /// # let value = "John Smith".into();
450 /// #
451 /// index.add(id, &value)?;
452 ///
453 /// index.clear()?;
454 /// #
455 /// # Ok(())
456 /// # }
457 /// ```
458 pub fn clear(&mut self) -> Result<()> {
459 self.write(Default::default())
460 }
461
462 fn read(&self) -> Result<IndexDocument> {
463 match self.storage.read(self.id) {
464 Ok(reader) => Ok(R::deserialize(reader)?),
465 Err(e) if e.is_not_found() => Ok(Default::default()),
466 Err(e) => Err(e.into())
467 }
468 }
469
470 fn write(&mut self, index_document: IndexDocument) -> Result<()> {
471 Ok(R::serialize(self.storage.write(self.id)?, &index_document)?)
472 }
473}
474
475/// Index iterator, yeilding keys/value pairs.
476///
477/// This is an iterator of `(HashSet<Uuid>, T)`, because there can be multiple ids for the same
478/// value in the index.
479///
480/// # Example
481///
482/// ```
483/// # use dodo::{prelude::*, storage::Memory};
484/// # use serde::{Deserialize, Serialize};
485/// # use uuid::Uuid;
486/// # use std::collections::HashSet;
487/// #
488/// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
489/// # #[serde(rename_all = "camelCase")]
490/// # struct Person { id: Option<Uuid>, age: u64 }
491/// # type NameIndex = Index<String, Memory, JsonSerializer>;
492/// #
493/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
494/// # let mut index = NameIndex::new(Memory::new());
495/// #
496/// let pairs : Vec<(HashSet<Uuid>, String)> = index.find_all()?
497/// .collect();
498/// #
499/// # Ok(())
500/// # }
501/// ```
502#[derive(Debug)]
503pub struct IndexIterator<T> {
504 iterator: vec::IntoIter<(HashSet<Uuid>, T)>,
505}
506
507impl<T> IndexIterator<T>
508 where T: FromStr {
509 fn new(index_document: IndexDocument) -> Result<Self> {
510 let mut items = Vec::with_capacity(index_document.len());
511
512 for (value, ids) in index_document.into_iter() {
513 match T::from_str(&value) {
514 Ok(value) => items.push((ids, value)),
515 Err(_) => return Err(IndexError::syntax(format!("index value could not be parsed : {}", &value)))
516 }
517 }
518
519 Ok(Self {
520 iterator: items.into_iter()
521 })
522 }
523}
524
525impl<T> Iterator for IndexIterator<T> {
526 type Item = (HashSet<Uuid>, T);
527
528 fn next(&mut self) -> Option<Self::Item> {
529 self.iterator.next()
530 }
531}