dodo/index/
mod.rs

1//! Index of values.
2
3use std::borrow::Borrow;
4use std::collections::{HashMap, HashSet};
5use std::hash::Hash;
6use std::marker::PhantomData;
7use std::str::FromStr;
8use std::vec;
9
10use uuid::Uuid;
11
12pub use error::*;
13
14use crate::serializer::Serializer;
15use crate::storage::Storage;
16
17mod error;
18
19type IndexDocument = HashMap<String, HashSet<Uuid>>;
20
21/// Index of values.
22///
23/// Indexes are usually paired with collections to make some queries faster. They allow to quickly
24/// locate an entity id without having to search the whole collection.
25///
26/// # Example
27///
28/// ```
29/// use dodo::prelude::*;
30/// # use serde::{Deserialize, Serialize};
31/// # use uuid::Uuid;
32/// #
33/// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
34/// # #[serde(rename_all = "camelCase")]
35/// # struct Person { id: Option<Uuid>, name : String, age: u64 }
36/// #
37/// # impl Person {
38/// #    fn new(name : &str) -> Self { Self { id : None, name : name.into(), age : 42 } }
39/// # }
40///
41/// type PersonCollection = Collection<Person, Directory, JsonSerializer>;
42/// type NameIndex = Index<String, Directory, JsonSerializer>;
43///
44/// fn main() -> Result<(), Box<dyn std::error::Error>> {
45/// #   let collection_path  = tempfile::tempdir()?;
46/// #   let index_path  = tempfile::tempdir()?;
47///     let mut collection = PersonCollection::new(Directory::new(&collection_path)?);
48///     let mut index = NameIndex::new(Directory::new(&index_path)?);
49///
50///     let mut person1 = Person::new("John Smith");
51///     collection.insert(&mut person1)?;
52///     index.add(person1.id.unwrap(), &person1.name)?;
53///     let mut person2 = Person::new("John Smith");
54///     collection.insert(&mut person2)?;
55///     index.add(person2.id.unwrap(), &person2.name)?;
56///     let mut person3 = Person::new("Mary Smith");
57///     collection.insert(&mut person3)?;
58///     index.add(person3.id.unwrap(), &person3.name)?;
59///
60///     // Ids of all "John Smith"s.
61///     let ids = index.find("John Smith")?;
62///
63///     println!("{:?}", ids);
64///
65///     Ok(())
66/// }
67/// ```
68///
69/// # Storage optimisations
70///
71/// Like collections, indexes assume to have complete control of their storage, but if needed, they
72/// can share a common storage as long as each index have a different id assigned to it. See
73/// the `with_index` function for details.
74///
75/// # Serializer
76///
77/// Like collections, indexes need a serializer to serializer the index data.
78///
79/// # Other considerations
80///
81/// You should only index basic types, like numbers or strings. You can still index any value
82/// as long as it implements [Hash](https://doc.rust-lang.org/stable/std/hash/trait.Hash.html),
83/// [ToString](https://doc.rust-lang.org/stable/std/string/trait.ToString.html) and
84/// [FromStr](https://doc.rust-lang.org/stable/std/str/trait.FromStr.html), but it should be
85/// considerez bad practice.
86#[derive(Debug, Clone)]
87pub struct Index<T, S, R> {
88    id: Uuid,
89    storage: S,
90    _t: PhantomData<T>,
91    _r: PhantomData<R>,
92}
93
94impl<T, S, R> Index<T, S, R>
95    where S: Storage,
96          R: Serializer {
97    /// Create a new index, using provided storage.
98    ///
99    /// When created like this, the index assumes that it has complete control over the storage
100    /// and stores the index data a single file named `00000000-0000-0000-0000-000000000000`. If
101    /// you want to reuse a storage for multiple indexes, see the `with_id` function.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// use dodo::prelude::*;
107    /// # use serde::{Deserialize, Serialize};
108    /// # use uuid::Uuid;
109    /// #
110    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
111    /// # #[serde(rename_all = "camelCase")]
112    /// # struct Person { id: Option<Uuid>, name : String, age: u64 }
113    /// #
114    /// # impl Person {
115    /// #    fn new(name : &str) -> Self { Self { id : None, name : name.into(), age : 42 } }
116    /// # }
117    ///
118    /// type NameIndex = Index<String, Directory, JsonSerializer>;
119    ///
120    /// fn main() -> Result<(), Box<dyn std::error::Error>> {
121    /// #   let index_path  = tempfile::tempdir()?;
122    ///     let mut index = NameIndex::new(Directory::new(&index_path)?);
123    ///
124    ///     Ok(())
125    /// }
126    /// ```
127    pub fn new(storage: S) -> Self {
128        Self::with_id(storage, Uuid::nil())
129    }
130
131    /// Create a new index inside provided storage, using assigned id.
132    ///
133    /// When created like this, the index knows that it does not have complete control over the
134    /// storage and stores the index data a single file named after his assigned id.
135    ///
136    /// # Examples
137    ///
138    /// ```
139    /// use dodo::prelude::*;
140    /// # use serde::{Deserialize, Serialize};
141    /// # use uuid::Uuid;
142    /// #
143    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
144    /// # #[serde(rename_all = "camelCase")]
145    /// # struct Person { id: Option<Uuid>, name : String, age: u64 }
146    /// #
147    /// # impl Person {
148    /// #    fn new(name : &str) -> Self { Self { id : None, name : name.into(), age : 42 } }
149    /// # }
150    ///
151    /// type NameIndex = Index<String, Directory, JsonSerializer>;
152    ///
153    /// fn main() -> Result<(), Box<dyn std::error::Error>> {
154    /// #   let index_path  = tempfile::tempdir()?;
155    ///     let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
156    ///     let mut index = NameIndex::with_id(Directory::new(&index_path)?, id);
157    ///
158    ///     Ok(())
159    /// }
160    /// ```
161    pub fn with_id(storage: S, id: Uuid) -> Self {
162        Self {
163            id,
164            storage,
165            _t: PhantomData,
166            _r: PhantomData,
167        }
168    }
169
170    /// Returns all ids corresponding to the given value.
171    ///
172    /// Returns an error if not found.
173    ///
174    /// # Examples
175    ///
176    /// ```
177    /// # use dodo::{prelude::*, storage::Memory};
178    /// # use serde::{Deserialize, Serialize};
179    /// # use uuid::Uuid;
180    /// #
181    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
182    /// # #[serde(rename_all = "camelCase")]
183    /// # struct Person { id: Option<Uuid>, age: u64 }
184    /// # type NameIndex = Index<String, Memory, JsonSerializer>;
185    /// #
186    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
187    /// #     let index = NameIndex::new(Memory::new());
188    /// #
189    /// let ids = index.find("John Smith");
190    ///
191    /// match ids {
192    ///     Ok(ids) => println!("Found!"),
193    ///     Err(e) if e.is_not_found() => println!("Not found!"),
194    ///     Err(_) => println!("Other error!")
195    /// }
196    /// #
197    /// #     Ok(())
198    /// # }
199    /// ```
200    pub fn find<Q>(&self, value: &Q) -> Result<HashSet<Uuid>>
201        where T: Borrow<Q>,
202              Q: Hash + ToString + ?Sized {
203        let mut index_document = self.read()?;
204
205        let value = value.to_string();
206        index_document.remove(&value).ok_or_else(|| IndexError::not_found())
207    }
208
209    /// Provide an iterator through the index entries.
210    ///
211    /// This can be pretty useful to query all index values.
212    ///
213    /// # Examples
214    ///
215    /// ```
216    /// # use dodo::{prelude::*, storage::Memory};
217    /// # use serde::{Deserialize, Serialize};
218    /// # use uuid::Uuid;
219    /// # use std::collections::HashSet;
220    /// #
221    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
222    /// # #[serde(rename_all = "camelCase")]
223    /// # struct Person { id: Option<Uuid>, age: u64 }
224    /// # type NameIndex = Index<String, Memory, JsonSerializer>;
225    /// #
226    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
227    /// #     let index = NameIndex::new(Memory::new());
228    /// #
229    /// let names: Vec<String> = index.find_all()?.map(|(ids, name) : (HashSet<Uuid>, String)| name).collect();
230    ///
231    /// println!("{:#?}", names);
232    /// #
233    /// #     Ok(())
234    /// # }
235    /// ```
236    pub fn find_all(&self) -> Result<IndexIterator<T>>
237        where T: FromStr {
238        IndexIterator::new(self.read()?)
239    }
240
241    /// Add value to the index, assigned to provided id.
242    ///
243    /// The index doesn't keep multiple copies of the same key/value pair (i.e there are no
244    /// duplicates).
245    ///
246    /// # Examples
247    ///
248    /// ```
249    /// # use dodo::{prelude::*, storage::Memory};
250    /// # use serde::{Deserialize, Serialize};
251    /// # use uuid::Uuid;
252    /// # use std::collections::HashSet;
253    /// #
254    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
255    /// # #[serde(rename_all = "camelCase")]
256    /// # struct Person { id: Option<Uuid>, age: u64 }
257    /// # type NameIndex = Index<String, Memory, JsonSerializer>;
258    /// #
259    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
260    /// #     let mut index = NameIndex::new(Memory::new());
261    /// #
262    /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
263    /// let value = "John Smith".into();
264    ///
265    /// index.add(id, &value)?;
266    /// #
267    /// #     Ok(())
268    /// # }
269    /// ```
270    pub fn add(&mut self, id: Uuid, value: &T) -> Result<()>
271        where T: Hash + ToString {
272        let mut index_document = self.read()?;
273
274        let mut has_changed = false;
275        has_changed = has_changed || index_document.entry(value.to_string()).or_insert_with(|| {
276            has_changed = true;
277            Default::default()
278        }).insert(id);
279
280        if has_changed {
281            self.write(index_document)?;
282        }
283        Ok(())
284    }
285
286    /// Remove id/value pair from index.
287    ///
288    /// This does not fail if the pair doesn't exist in the index. Instead, this returns a
289    /// boolean : if true, the pair was removed, and if false, the pair was not found.
290    ///
291    /// # Examples
292    ///
293    /// ```
294    /// # use dodo::{prelude::*, storage::Memory};
295    /// # use serde::{Deserialize, Serialize};
296    /// # use uuid::Uuid;
297    /// # use std::collections::HashSet;
298    /// #
299    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
300    /// # #[serde(rename_all = "camelCase")]
301    /// # struct Person { id: Option<Uuid>, age: u64 }
302    /// # type NameIndex = Index<String, Memory, JsonSerializer>;
303    /// #
304    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
305    /// #     let mut index = NameIndex::new(Memory::new());
306    /// #
307    /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
308    /// let value : String = "John Smith".into();
309    /// index.add(id, &value)?;
310    ///
311    /// assert!(index.remove(id, &value)?);
312    /// #
313    /// #     Ok(())
314    /// # }
315    /// ```
316    pub fn remove<Q>(&mut self, id: Uuid, value: &Q) -> Result<bool>
317        where T: Borrow<Q>,
318              Q: Hash + ToString + ?Sized {
319        let mut index_document = self.read()?;
320
321        let value = value.to_string();
322        let mut has_changed = false;
323        if let Some(ids) = index_document.get_mut(&value) {
324            has_changed = ids.remove(&id);
325            if ids.is_empty() { index_document.remove(&value); }
326        }
327
328        if has_changed { self.write(index_document)?; }
329        Ok(has_changed)
330    }
331
332    /// Remove id from index.
333    ///
334    /// This has some serious performance implications as it must traverse the entire index and
335    /// should only be used when unavoidable.
336    ///
337    /// This does not fail if the id doesn't exist in the index. Instead, this returns a boolean :
338    /// if true, the id was removed, and if false, the id was not found.
339    ///
340    /// # Examples
341    ///
342    /// ```
343    /// # use dodo::{prelude::*, storage::Memory};
344    /// # use serde::{Deserialize, Serialize};
345    /// # use uuid::Uuid;
346    /// # use std::collections::HashSet;
347    /// #
348    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
349    /// # #[serde(rename_all = "camelCase")]
350    /// # struct Person { id: Option<Uuid>, age: u64 }
351    /// # type NameIndex = Index<String, Memory, JsonSerializer>;
352    /// #
353    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
354    /// #     let mut index = NameIndex::new(Memory::new());
355    /// #
356    /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
357    /// let value : String = "John Smith".into();
358    /// index.add(id, &value)?;
359    ///
360    /// assert!(index.remove_id(id)?);
361    /// #
362    /// #     Ok(())
363    /// # }
364    /// ```
365    pub fn remove_id(&mut self, id: Uuid) -> Result<bool> {
366        let mut index_document = self.read()?;
367
368        let mut has_changed = false;
369        index_document.retain(|_, it| {
370            has_changed = has_changed || it.remove(&id);
371            it.len() > 0
372        });
373
374        if has_changed { self.write(index_document)?; }
375        Ok(has_changed)
376    }
377
378    /// Remove value from index and all related ids.
379    ///
380    /// Returns the ids assigned to the removed value. Note that theses ids can still be in the
381    /// index, but assigned to others values.
382    ///
383    /// This does not fail if the value doesn't exist in the index. In that case, this returns an
384    /// empty set of ids.
385    ///
386    /// # Examples
387    ///
388    /// ```
389    /// # use dodo::{prelude::*, storage::Memory};
390    /// # use serde::{Deserialize, Serialize};
391    /// # use uuid::Uuid;
392    /// # use std::collections::HashSet;
393    /// #
394    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
395    /// # #[serde(rename_all = "camelCase")]
396    /// # struct Person { id: Option<Uuid>, age: u64 }
397    /// # type NameIndex = Index<String, Memory, JsonSerializer>;
398    /// #
399    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
400    /// #     let mut index = NameIndex::new(Memory::new());
401    /// #
402    /// let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
403    /// let value : String = "John Smith".into();
404    /// index.add(id, &value)?;
405    ///
406    /// let ids = index.remove_value(&value)?;
407    /// assert!(ids.contains(&id));
408    /// #
409    /// #     Ok(())
410    /// # }
411    /// ```
412    pub fn remove_value<Q>(&mut self, value: &Q) -> Result<HashSet<Uuid>>
413        where T: Borrow<Q>,
414              Q: Hash + ToString + ?Sized {
415        let mut index_document = self.read()?;
416
417        match index_document.remove(&value.to_string()) {
418            Some(ids) => {
419                self.write(index_document)?;
420                Ok(ids)
421            }
422            None => {
423                Ok(Default::default())
424            }
425        }
426    }
427
428    /// Remove every value in this index.
429    ///
430    /// Everything in this index will be deleted. Use at your own risks.
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// # use dodo::{prelude::*, storage::Memory};
436    /// # use serde::{Deserialize, Serialize};
437    /// # use uuid::Uuid;
438    /// # use std::collections::HashSet;
439    /// #
440    /// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
441    /// # #[serde(rename_all = "camelCase")]
442    /// # struct Person { id: Option<Uuid>, age: u64 }
443    /// # type NameIndex = Index<String, Memory, JsonSerializer>;
444    /// #
445    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
446    /// #     let mut index = NameIndex::new(Memory::new());
447    /// #
448    /// #     let id = Uuid::parse_str("78190929-3d84-4735-9e40-80e3cd5530e9").unwrap();
449    /// #     let value = "John Smith".into();
450    /// #
451    /// index.add(id, &value)?;
452    ///
453    /// index.clear()?;
454    /// #
455    /// #     Ok(())
456    /// # }
457    /// ```
458    pub fn clear(&mut self) -> Result<()> {
459        self.write(Default::default())
460    }
461
462    fn read(&self) -> Result<IndexDocument> {
463        match self.storage.read(self.id) {
464            Ok(reader) => Ok(R::deserialize(reader)?),
465            Err(e) if e.is_not_found() => Ok(Default::default()),
466            Err(e) => Err(e.into())
467        }
468    }
469
470    fn write(&mut self, index_document: IndexDocument) -> Result<()> {
471        Ok(R::serialize(self.storage.write(self.id)?, &index_document)?)
472    }
473}
474
475/// Index iterator, yeilding keys/value pairs.
476///
477/// This is an iterator of `(HashSet<Uuid>, T)`, because there can be multiple ids for the same
478/// value in the index.
479///
480/// # Example
481///
482/// ```
483/// # use dodo::{prelude::*, storage::Memory};
484/// # use serde::{Deserialize, Serialize};
485/// # use uuid::Uuid;
486/// # use std::collections::HashSet;
487/// #
488/// # #[derive(Debug, Entity, Serialize, Deserialize, Eq, PartialEq)]
489/// # #[serde(rename_all = "camelCase")]
490/// # struct Person { id: Option<Uuid>, age: u64 }
491/// # type NameIndex = Index<String, Memory, JsonSerializer>;
492/// #
493/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
494/// #     let mut index = NameIndex::new(Memory::new());
495/// #
496/// let pairs : Vec<(HashSet<Uuid>, String)> = index.find_all()?
497///                                                 .collect();
498/// #
499/// #     Ok(())
500/// # }
501/// ```
502#[derive(Debug)]
503pub struct IndexIterator<T> {
504    iterator: vec::IntoIter<(HashSet<Uuid>, T)>,
505}
506
507impl<T> IndexIterator<T>
508    where T: FromStr {
509    fn new(index_document: IndexDocument) -> Result<Self> {
510        let mut items = Vec::with_capacity(index_document.len());
511
512        for (value, ids) in index_document.into_iter() {
513            match T::from_str(&value) {
514                Ok(value) => items.push((ids, value)),
515                Err(_) => return Err(IndexError::syntax(format!("index value could not be parsed : {}", &value)))
516            }
517        }
518
519        Ok(Self {
520            iterator: items.into_iter()
521        })
522    }
523}
524
525impl<T> Iterator for IndexIterator<T> {
526    type Item = (HashSet<Uuid>, T);
527
528    fn next(&mut self) -> Option<Self::Item> {
529        self.iterator.next()
530    }
531}