simple_triplestore/
sled.rs

1use std::{
2    collections::{BTreeMap, HashMap},
3    hash::{Hash, Hasher},
4};
5
6use crate::{
7    prelude::*,
8    traits::{IdType, Property},
9    IdGenerator,
10};
11use serde::{de::DeserializeOwned, Serialize};
12
13mod extend;
14mod insert;
15mod iter;
16mod merge;
17mod query;
18mod remove;
19
20#[derive(Debug)]
21pub enum SledTripleStoreError {
22    SledError(sled::Error),
23    SerializationError(bincode::Error),
24    KeySizeError,
25    MissingPropertyData,
26}
27
28impl From<sled::Error> for SledTripleStoreError {
29    fn from(e: sled::Error) -> Self {
30        SledTripleStoreError::SledError(e)
31    }
32}
33
34impl From<bincode::Error> for SledTripleStoreError {
35    fn from(e: bincode::Error) -> Self {
36        SledTripleStoreError::SerializationError(e)
37    }
38}
39
40/// A triplestore which is backed by [sled](https://sled.rs).
41///
42/// # Example
43/// ```
44/// # use ulid::Ulid;
45/// # use simple_triplestore::{prelude::*, SledTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder};
46/// let temp_dir = tempdir::TempDir::new("sled").unwrap();
47/// let sled_db = sled::open(temp_dir.path()).unwrap();
48///
49/// let mut db = SledTripleStore::new(&sled_db, UlidIdGenerator::new())?;
50///
51/// // Get some identifiers. These will probably come from an index such as `Readable Name -> Ulid`
52/// let node_1 = Ulid(123);
53/// let node_2 = Ulid(456);
54/// let node_3 = Ulid(789);
55/// let edge = Ulid(999);
56///
57/// // We can insert nodes and edges with user-defined property types.
58/// // For a given TripleStore we can have one type for Nodes and one for Edges.
59/// db.insert_node(node_1, "foo".to_string())?;
60/// db.insert_node(node_2, "bar".to_string())?;
61/// db.insert_node(node_3, "baz".to_string())?;
62/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3]))?;
63/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6]))?;
64///
65/// // Three vertices with correct properties.
66/// assert_eq!(
67///   db.iter_vertices()
68///     .map(|r| r.expect("ok"))
69///     .collect::<Vec<_>>(),  
70///   [
71///     (node_1, "foo".to_string()),
72///     (node_2, "bar".to_string()),
73///     (node_3, "baz".to_string())
74///   ]
75/// );
76///
77/// // Two edges with the correct properties.
78/// assert_eq!(
79///   db.iter_edges_with_props(EdgeOrder::default())
80///     .map(|r| r.expect("ok"))
81///     .collect::<Vec<_>>(),
82///   [
83///     PropsTriple{
84///       sub: (node_1, "foo".to_string()),
85///       pred: (edge, Vec::from([1,2,3])),
86///       obj: (node_2, "bar".to_string())},
87///     PropsTriple{
88///       sub: (node_1, "foo".to_string()),
89///       pred: (edge, Vec::from([4,5,6])),
90///       obj: (node_3, "baz".to_string())}
91///   ]
92/// );
93/// # Ok::<(), simple_triplestore::sled::SledTripleStoreError>(())
94/// ```
95///
96/// We can do arbitrary queries, e.g.:
97/// ```
98/// # use ulid::Ulid;
99/// # use simple_triplestore::{prelude::*, SledTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder, QueryError};
100/// # let temp_dir = tempdir::TempDir::new("sled").unwrap();
101/// # let sled_db = sled::open(temp_dir.path()).unwrap();
102/// # let mut db = SledTripleStore::new(&sled_db, UlidIdGenerator::new()).unwrap();
103/// # let node_1 = Ulid(123);
104/// # let node_2 = Ulid(456);
105/// # let node_3 = Ulid(789);
106/// # let edge = Ulid(999);
107/// # db.insert_node(node_1, "foo".to_string()).unwrap();
108/// # db.insert_node(node_2, "bar".to_string()).unwrap();
109/// # db.insert_node(node_3, "baz".to_string()).unwrap();
110/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])).unwrap();
111/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])).unwrap();
112/// // 1. Edges where node_3 is the object.
113/// assert_eq!(
114///   db.run(query!{ ? -?-> [node_3] })?
115///     .iter_edges(EdgeOrder::default())
116///     .map(|r| r.expect("ok"))
117///     .collect::<Vec<_>>(),
118///   [
119///     (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
120///   ]
121/// );
122///
123/// // Edges with `edge` as the predicate.
124/// assert_eq!(
125///   db.run(query!{ ? -[edge]-> ? })?
126///     .iter_edges(EdgeOrder::default())
127///     .map(|r| r.expect("ok"))
128///     .collect::<Vec<_>>(),
129///   [
130///     (Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])),
131///     (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
132///   ]
133/// );
134///
135/// # Ok::<(), QueryError<simple_triplestore::sled::SledTripleStoreError, ()>>(())
136/// ```
137pub struct SledTripleStore<
138    Id: IdType,
139    NodeProps: Property + Serialize + DeserializeOwned,
140    EdgeProps: Serialize + DeserializeOwned,
141> {
142    _phantom: std::marker::PhantomData<(Id, NodeProps, EdgeProps)>,
143    node_props: sled::Tree,
144    edge_props: sled::Tree,
145    spo_data: sled::Tree,
146    pos_data: sled::Tree,
147    osp_data: sled::Tree,
148    id_generator: Box<dyn IdGenerator<Id>>,
149}
150
151impl<
152        Id: IdType,
153        NodeProps: Property + Serialize + DeserializeOwned,
154        EdgeProps: Property + Serialize + DeserializeOwned,
155    > SledTripleStore<Id, NodeProps, EdgeProps>
156{
157    pub fn new(
158        db: &sled::Db,
159        id_generator: impl IdGenerator<Id> + 'static,
160    ) -> Result<Self, SledTripleStoreError> {
161        let node_data = db
162            .open_tree(b"node_data")
163            .map_err(|e| SledTripleStoreError::SledError(e))?;
164        let edge_data = db.open_tree(b"edge_data")?;
165        let spo_data = db.open_tree(b"spo_data")?;
166        let pos_data = db.open_tree(b"pos_data")?;
167        let osp_data = db.open_tree(b"osp_data")?;
168
169        Ok(Self {
170            node_props: node_data,
171            edge_props: edge_data,
172            spo_data,
173            pos_data,
174            osp_data,
175            id_generator: Box::new(id_generator),
176            _phantom: std::marker::PhantomData,
177        })
178    }
179}
180
181impl<
182        Id: IdType,
183        NodeProps: Property + Serialize + DeserializeOwned,
184        EdgeProps: Property + Serialize + DeserializeOwned,
185    > TripleStoreError for SledTripleStore<Id, NodeProps, EdgeProps>
186{
187    type Error = SledTripleStoreError;
188}
189
190impl<
191        Id: IdType,
192        NodeProps: Property + Serialize + DeserializeOwned,
193        EdgeProps: Property + Serialize + DeserializeOwned,
194    > TripleStore<Id, NodeProps, EdgeProps> for SledTripleStore<Id, NodeProps, EdgeProps>
195{
196}
197
198impl<
199        Id: IdType,
200        NodeProps: Property + Serialize + DeserializeOwned,
201        EdgeProps: Property + Serialize + DeserializeOwned,
202    > std::fmt::Debug for SledTripleStore<Id, NodeProps, EdgeProps>
203{
204    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
205        f.write_str("SledTripleStore:\n")?;
206        f.write_str(" Node Properties:\n")?;
207        for r in self.node_props.iter() {
208            let (id, node_props) = r.map_err(|_| std::fmt::Error)?;
209            f.write_fmt(format_args!(
210                "  {} -> {:?}\n",
211                Id::try_from_be_bytes(&id).ok_or(std::fmt::Error)?,
212                bincode::deserialize(&node_props).map_err(|_| std::fmt::Error)?
213            ))?;
214        }
215
216        // When printing edge properties, we display the edge hash instead of the Ulid because it
217        // will be stable across graphs whereas the Ulid is not stable.
218        //
219        // Any of the edge hashes would work here, but spo is chosen arbitrarily.
220        f.write_str(" Edge Properties:\n")?;
221
222        // Construct: [Ulid] -> [u64] (SPO Edge hash)
223        let ulid_to_spo_edge_hash = self
224            .spo_data
225            .iter()
226            .map(|r| r.map_err(|_| std::fmt::Error))
227            .collect::<Result<Vec<_>, _>>()?
228            .into_iter()
229            .map(|(k, v)| {
230                let hash;
231                {
232                    let mut hash_builder = std::hash::DefaultHasher::new();
233                    k.as_ref().hash(&mut hash_builder);
234                    hash = hash_builder.finish();
235                }
236                (v.clone(), hash)
237            })
238            .collect::<HashMap<_, _>>();
239
240        // Use [Ulid] -> u64 on the keys of edge_props: [Ulid -> & Edge Properties] to produce:
241        //
242        //  [u64] -> [& Edge Properties]
243        //
244        // By using BTreeMap here, we get a nice print order.
245        let hash_to_edge_data = self
246            .edge_props
247            .iter()
248            .map(|r| r.map_err(|_| std::fmt::Error))
249            .collect::<Result<Vec<_>, _>>()?
250            .into_iter()
251            .map(|(ulid, edge_data)| match ulid_to_spo_edge_hash.get(&ulid) {
252                Some(hash) => (Some(hash), edge_data),
253                None => (None, edge_data),
254            })
255            .collect::<BTreeMap<_, _>>();
256
257        for (hash, node_props) in hash_to_edge_data {
258            match hash {
259                None => {
260                    f.write_fmt(format_args!("  _ -> {:?}\n", node_props))?;
261                }
262                Some(hash) => {
263                    f.write_fmt(format_args!("  {:#016x} -> {:?}\n", hash, node_props))?;
264                }
265            }
266        }
267
268        f.write_str(" Edges (SPO):\n")?;
269        for r in self.spo_data.iter() {
270            let (triple, ulid) = r.map_err(|_| std::fmt::Error)?;
271
272            let triple =
273                Id::decode_spo_triple(&triple[..].try_into().map_err(|_| std::fmt::Error)?);
274            f.write_fmt(format_args!(
275                "  ({}, {}, {}) -> ",
276                triple.sub, triple.pred, triple.obj
277            ))?;
278            match ulid_to_spo_edge_hash.get(&ulid) {
279                Some(hash) => {
280                    f.write_fmt(format_args!("{:#016x}\n", hash))?;
281                }
282                None => {
283                    f.write_str("_\n")?;
284                }
285            }
286        }
287
288        f.write_str(" Edges (POS):\n")?;
289        for r in self.pos_data.iter() {
290            let (triple, ulid) = r.map_err(|_| std::fmt::Error)?;
291
292            let triple =
293                Id::decode_pos_triple(&triple[..].try_into().map_err(|_| std::fmt::Error)?);
294            f.write_fmt(format_args!(
295                "  ({}, {}, {}) -> ",
296                triple.sub, triple.pred, triple.obj
297            ))?;
298            match ulid_to_spo_edge_hash.get(&ulid) {
299                Some(hash) => {
300                    f.write_fmt(format_args!("{:#016x}\n", hash))?;
301                }
302                None => {
303                    f.write_str("_\n")?;
304                }
305            }
306        }
307
308        f.write_str(" Edges (OSP):\n")?;
309        for r in self.osp_data.iter() {
310            let (triple, ulid) = r.map_err(|_| std::fmt::Error)?;
311
312            let triple =
313                Id::decode_osp_triple(&triple[..].try_into().map_err(|_| std::fmt::Error)?);
314            f.write_fmt(format_args!(
315                "  ({}, {}, {}) -> ",
316                triple.sub, triple.pred, triple.obj
317            ))?;
318            match ulid_to_spo_edge_hash.get(&ulid) {
319                Some(hash) => {
320                    f.write_fmt(format_args!("{:#016x}\n", hash))?;
321                }
322                None => {
323                    f.write_str("_\n")?;
324                }
325            }
326        }
327        Ok(())
328    }
329}
330
331#[cfg(test)]
332pub(crate) fn create_test_db() -> Result<(tempdir::TempDir, sled::Db), sled::Error> {
333    let temp_dir = tempdir::TempDir::new("SledTripleStore")?;
334    let db = sled::open(temp_dir.path())?;
335    Ok((temp_dir, db))
336}