liquid_ml/kv/mod.rs
1//! A module for distributed [`Key`], [`Value`] stores that hold their data in
2//! memory and are generic for (deserialized) [`Value`]s of type `T`.
3//!
4//! A [`KVStore`] is essentially a very simple in-memory distributed database
5//! (with no persistence) that stores data as a collection of key-value pairs
6//! where a [`Key`] is a unique identifier to a [`Value`]. The [`KVStore`]
7//! utilizes the [`network`] module to communicate between nodes using
8//! [`KVMessage`]s.
9//!
10//! Internally [`KVStore`]s store their data in memory as serialized blobs
11//! (a [`Value`] aka a `Vec<u8>`). The [`KVStore`] caches deserialized
12//! [`Value`]s into their type `T` on a least-recently used basis. A hard limit
13//! for the cache size is set to be `1/3` the amount of total memory on the
14//! machine, though this will be changed to be configurable.
15//!
16//! # Provided [`KVStore`] Functionality
17//! - [`get`]: Retrieve data stored locally on a [`KVStore`] or in its cache
18//! - [`wait_and_get`]: Retrieve data either locally or over the network
19//! - [`put`]: Store a [`Key`], [`Value`] pair either locally on a [`KVStore`]
20//! or send it over the network to store it on another [`KVStore`]
21//! - [`send_blob`]: a lower level interface to facilitate sending any
22//! serialized data. In `liquid_ml`, this is used for sending
23//! [`Rower`](../dataframe/trait.Rower.html)s
24//! (and other use cases) in a
25//! [`DistributedDataFrame`](../dataframe/struct.DistributedDataFrame.html)
26//!
27//!
28//!
29//! [`Key`]: struct.Key.html
30//! [`Value`]: type.Key.html
31//! [`KVStore`]: struct.KVStore.html
32//! [`get`]: struct.KVStore.html#method.get
33//! [`wait_and_get`]: struct.KVStore.html#method.wait_and_get
34//! [`put`]: struct.KVStore.html#method.put
35//! [`send_blob`]: struct.KVStore.html#method.send_blob
36//! [`KVMessage`]: enum.KVMessage.html
37//! [`Data`]: enum.KVMessage.html#variant.Data
38//! [`Put`]: enum.KVMessage.html#variant.Put
39//! [`Blob`]: enum.KVMessage.html#variant.Blob
40use rand::{self, Rng};
41use serde::{Deserialize, Serialize};
42
43mod kv_store;
44pub use crate::kv::kv_store::{KVMessage, KVStore};
45
46/// A `Key` defines where in a [`KVStore`] a [`Value`] is stored, as well as
47/// which node (and thus which [`KVStore`]) 'owns' the [`Value`]
48///
49/// [`KVStore`]: struct.KVStore.html
50/// [`Value`]: type.Value.html
51#[derive(PartialEq, Eq, Hash, Serialize, Deserialize, Debug, Clone)]
52pub struct Key {
53 /// Defines where in a [`KVStore`] a value is stored
54 ///
55 /// [`KVStore`]: struct.KVStore.html
56 pub name: String,
57 /// Defines which node 'owns' the associated [`Value`]
58 ///
59 /// [`Value`]: type.Key.html
60 pub home: usize,
61}
62
63/// A serialized blob of data. Is associated with a [`Key`] which defines where
64/// this `Value` is stored in a [`KVStore`], as well as its 'owner'
65///
66/// [`Key`]: struct.Key.html
67/// [`KVStore`]: struct.KVStore.html
68pub type Value = Vec<u8>;
69
70impl Key {
71 /// Creates a new [`Key`] that is owned by the [`KVStore`] running on the
72 /// node with id equal to `home`. The given `name` defines where in the
73 /// [`KVStore`] the value is stored.
74 ///
75 /// [`Key`]: struct.Key.html
76 /// [`KVStore`]: struct.KVStore.html
77 pub fn new(name: &str, home: usize) -> Self {
78 Key {
79 name: String::from(name),
80 home,
81 }
82 }
83
84 /// Make a key with an automatically generated name
85 pub(crate) fn generate(name: &str, home: usize) -> Self {
86 let mut rng = rand::thread_rng();
87 Key {
88 name: format!("{}-{}-{}", name, home, rng.gen::<i16>()),
89 home,
90 }
91 }
92}