simple_triplestore/sled.rs
1use std::{
2 collections::{BTreeMap, HashMap},
3 hash::{Hash, Hasher},
4};
5
6use crate::{
7 prelude::*,
8 traits::{IdType, Property},
9 IdGenerator,
10};
11use serde::{de::DeserializeOwned, Serialize};
12
13mod extend;
14mod insert;
15mod iter;
16mod merge;
17mod query;
18mod remove;
19
20#[derive(Debug)]
21pub enum SledTripleStoreError {
22 SledError(sled::Error),
23 SerializationError(bincode::Error),
24 KeySizeError,
25 MissingPropertyData,
26}
27
28impl From<sled::Error> for SledTripleStoreError {
29 fn from(e: sled::Error) -> Self {
30 SledTripleStoreError::SledError(e)
31 }
32}
33
34impl From<bincode::Error> for SledTripleStoreError {
35 fn from(e: bincode::Error) -> Self {
36 SledTripleStoreError::SerializationError(e)
37 }
38}
39
40/// A triplestore which is backed by [sled](https://sled.rs).
41///
42/// # Example
43/// ```
44/// # use ulid::Ulid;
45/// # use simple_triplestore::{prelude::*, SledTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder};
46/// let temp_dir = tempdir::TempDir::new("sled").unwrap();
47/// let sled_db = sled::open(temp_dir.path()).unwrap();
48///
49/// let mut db = SledTripleStore::new(&sled_db, UlidIdGenerator::new())?;
50///
51/// // Get some identifiers. These will probably come from an index such as `Readable Name -> Ulid`
52/// let node_1 = Ulid(123);
53/// let node_2 = Ulid(456);
54/// let node_3 = Ulid(789);
55/// let edge = Ulid(999);
56///
57/// // We can insert nodes and edges with user-defined property types.
58/// // For a given TripleStore we can have one type for Nodes and one for Edges.
59/// db.insert_node(node_1, "foo".to_string())?;
60/// db.insert_node(node_2, "bar".to_string())?;
61/// db.insert_node(node_3, "baz".to_string())?;
62/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3]))?;
63/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6]))?;
64///
65/// // Three vertices with correct properties.
66/// assert_eq!(
67/// db.iter_vertices()
68/// .map(|r| r.expect("ok"))
69/// .collect::<Vec<_>>(),
70/// [
71/// (node_1, "foo".to_string()),
72/// (node_2, "bar".to_string()),
73/// (node_3, "baz".to_string())
74/// ]
75/// );
76///
77/// // Two edges with the correct properties.
78/// assert_eq!(
79/// db.iter_edges_with_props(EdgeOrder::default())
80/// .map(|r| r.expect("ok"))
81/// .collect::<Vec<_>>(),
82/// [
83/// PropsTriple{
84/// sub: (node_1, "foo".to_string()),
85/// pred: (edge, Vec::from([1,2,3])),
86/// obj: (node_2, "bar".to_string())},
87/// PropsTriple{
88/// sub: (node_1, "foo".to_string()),
89/// pred: (edge, Vec::from([4,5,6])),
90/// obj: (node_3, "baz".to_string())}
91/// ]
92/// );
93/// # Ok::<(), simple_triplestore::sled::SledTripleStoreError>(())
94/// ```
95///
96/// We can do arbitrary queries, e.g.:
97/// ```
98/// # use ulid::Ulid;
99/// # use simple_triplestore::{prelude::*, SledTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder, QueryError};
100/// # let temp_dir = tempdir::TempDir::new("sled").unwrap();
101/// # let sled_db = sled::open(temp_dir.path()).unwrap();
102/// # let mut db = SledTripleStore::new(&sled_db, UlidIdGenerator::new()).unwrap();
103/// # let node_1 = Ulid(123);
104/// # let node_2 = Ulid(456);
105/// # let node_3 = Ulid(789);
106/// # let edge = Ulid(999);
107/// # db.insert_node(node_1, "foo".to_string()).unwrap();
108/// # db.insert_node(node_2, "bar".to_string()).unwrap();
109/// # db.insert_node(node_3, "baz".to_string()).unwrap();
110/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])).unwrap();
111/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])).unwrap();
112/// // 1. Edges where node_3 is the object.
113/// assert_eq!(
114/// db.run(query!{ ? -?-> [node_3] })?
115/// .iter_edges(EdgeOrder::default())
116/// .map(|r| r.expect("ok"))
117/// .collect::<Vec<_>>(),
118/// [
119/// (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
120/// ]
121/// );
122///
123/// // Edges with `edge` as the predicate.
124/// assert_eq!(
125/// db.run(query!{ ? -[edge]-> ? })?
126/// .iter_edges(EdgeOrder::default())
127/// .map(|r| r.expect("ok"))
128/// .collect::<Vec<_>>(),
129/// [
130/// (Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])),
131/// (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
132/// ]
133/// );
134///
135/// # Ok::<(), QueryError<simple_triplestore::sled::SledTripleStoreError, ()>>(())
136/// ```
137pub struct SledTripleStore<
138 Id: IdType,
139 NodeProps: Property + Serialize + DeserializeOwned,
140 EdgeProps: Serialize + DeserializeOwned,
141> {
142 _phantom: std::marker::PhantomData<(Id, NodeProps, EdgeProps)>,
143 node_props: sled::Tree,
144 edge_props: sled::Tree,
145 spo_data: sled::Tree,
146 pos_data: sled::Tree,
147 osp_data: sled::Tree,
148 id_generator: Box<dyn IdGenerator<Id>>,
149}
150
151impl<
152 Id: IdType,
153 NodeProps: Property + Serialize + DeserializeOwned,
154 EdgeProps: Property + Serialize + DeserializeOwned,
155 > SledTripleStore<Id, NodeProps, EdgeProps>
156{
157 pub fn new(
158 db: &sled::Db,
159 id_generator: impl IdGenerator<Id> + 'static,
160 ) -> Result<Self, SledTripleStoreError> {
161 let node_data = db
162 .open_tree(b"node_data")
163 .map_err(|e| SledTripleStoreError::SledError(e))?;
164 let edge_data = db.open_tree(b"edge_data")?;
165 let spo_data = db.open_tree(b"spo_data")?;
166 let pos_data = db.open_tree(b"pos_data")?;
167 let osp_data = db.open_tree(b"osp_data")?;
168
169 Ok(Self {
170 node_props: node_data,
171 edge_props: edge_data,
172 spo_data,
173 pos_data,
174 osp_data,
175 id_generator: Box::new(id_generator),
176 _phantom: std::marker::PhantomData,
177 })
178 }
179}
180
181impl<
182 Id: IdType,
183 NodeProps: Property + Serialize + DeserializeOwned,
184 EdgeProps: Property + Serialize + DeserializeOwned,
185 > TripleStoreError for SledTripleStore<Id, NodeProps, EdgeProps>
186{
187 type Error = SledTripleStoreError;
188}
189
190impl<
191 Id: IdType,
192 NodeProps: Property + Serialize + DeserializeOwned,
193 EdgeProps: Property + Serialize + DeserializeOwned,
194 > TripleStore<Id, NodeProps, EdgeProps> for SledTripleStore<Id, NodeProps, EdgeProps>
195{
196}
197
198impl<
199 Id: IdType,
200 NodeProps: Property + Serialize + DeserializeOwned,
201 EdgeProps: Property + Serialize + DeserializeOwned,
202 > std::fmt::Debug for SledTripleStore<Id, NodeProps, EdgeProps>
203{
204 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
205 f.write_str("SledTripleStore:\n")?;
206 f.write_str(" Node Properties:\n")?;
207 for r in self.node_props.iter() {
208 let (id, node_props) = r.map_err(|_| std::fmt::Error)?;
209 f.write_fmt(format_args!(
210 " {} -> {:?}\n",
211 Id::try_from_be_bytes(&id).ok_or(std::fmt::Error)?,
212 bincode::deserialize(&node_props).map_err(|_| std::fmt::Error)?
213 ))?;
214 }
215
216 // When printing edge properties, we display the edge hash instead of the Ulid because it
217 // will be stable across graphs whereas the Ulid is not stable.
218 //
219 // Any of the edge hashes would work here, but spo is chosen arbitrarily.
220 f.write_str(" Edge Properties:\n")?;
221
222 // Construct: [Ulid] -> [u64] (SPO Edge hash)
223 let ulid_to_spo_edge_hash = self
224 .spo_data
225 .iter()
226 .map(|r| r.map_err(|_| std::fmt::Error))
227 .collect::<Result<Vec<_>, _>>()?
228 .into_iter()
229 .map(|(k, v)| {
230 let hash;
231 {
232 let mut hash_builder = std::hash::DefaultHasher::new();
233 k.as_ref().hash(&mut hash_builder);
234 hash = hash_builder.finish();
235 }
236 (v.clone(), hash)
237 })
238 .collect::<HashMap<_, _>>();
239
240 // Use [Ulid] -> u64 on the keys of edge_props: [Ulid -> & Edge Properties] to produce:
241 //
242 // [u64] -> [& Edge Properties]
243 //
244 // By using BTreeMap here, we get a nice print order.
245 let hash_to_edge_data = self
246 .edge_props
247 .iter()
248 .map(|r| r.map_err(|_| std::fmt::Error))
249 .collect::<Result<Vec<_>, _>>()?
250 .into_iter()
251 .map(|(ulid, edge_data)| match ulid_to_spo_edge_hash.get(&ulid) {
252 Some(hash) => (Some(hash), edge_data),
253 None => (None, edge_data),
254 })
255 .collect::<BTreeMap<_, _>>();
256
257 for (hash, node_props) in hash_to_edge_data {
258 match hash {
259 None => {
260 f.write_fmt(format_args!(" _ -> {:?}\n", node_props))?;
261 }
262 Some(hash) => {
263 f.write_fmt(format_args!(" {:#016x} -> {:?}\n", hash, node_props))?;
264 }
265 }
266 }
267
268 f.write_str(" Edges (SPO):\n")?;
269 for r in self.spo_data.iter() {
270 let (triple, ulid) = r.map_err(|_| std::fmt::Error)?;
271
272 let triple =
273 Id::decode_spo_triple(&triple[..].try_into().map_err(|_| std::fmt::Error)?);
274 f.write_fmt(format_args!(
275 " ({}, {}, {}) -> ",
276 triple.sub, triple.pred, triple.obj
277 ))?;
278 match ulid_to_spo_edge_hash.get(&ulid) {
279 Some(hash) => {
280 f.write_fmt(format_args!("{:#016x}\n", hash))?;
281 }
282 None => {
283 f.write_str("_\n")?;
284 }
285 }
286 }
287
288 f.write_str(" Edges (POS):\n")?;
289 for r in self.pos_data.iter() {
290 let (triple, ulid) = r.map_err(|_| std::fmt::Error)?;
291
292 let triple =
293 Id::decode_pos_triple(&triple[..].try_into().map_err(|_| std::fmt::Error)?);
294 f.write_fmt(format_args!(
295 " ({}, {}, {}) -> ",
296 triple.sub, triple.pred, triple.obj
297 ))?;
298 match ulid_to_spo_edge_hash.get(&ulid) {
299 Some(hash) => {
300 f.write_fmt(format_args!("{:#016x}\n", hash))?;
301 }
302 None => {
303 f.write_str("_\n")?;
304 }
305 }
306 }
307
308 f.write_str(" Edges (OSP):\n")?;
309 for r in self.osp_data.iter() {
310 let (triple, ulid) = r.map_err(|_| std::fmt::Error)?;
311
312 let triple =
313 Id::decode_osp_triple(&triple[..].try_into().map_err(|_| std::fmt::Error)?);
314 f.write_fmt(format_args!(
315 " ({}, {}, {}) -> ",
316 triple.sub, triple.pred, triple.obj
317 ))?;
318 match ulid_to_spo_edge_hash.get(&ulid) {
319 Some(hash) => {
320 f.write_fmt(format_args!("{:#016x}\n", hash))?;
321 }
322 None => {
323 f.write_str("_\n")?;
324 }
325 }
326 }
327 Ok(())
328 }
329}
330
331#[cfg(test)]
332pub(crate) fn create_test_db() -> Result<(tempdir::TempDir, sled::Db), sled::Error> {
333 let temp_dir = tempdir::TempDir::new("SledTripleStore")?;
334 let db = sled::open(temp_dir.path())?;
335 Ok((temp_dir, db))
336}