simple_triplestore/mem.rs
1use crate::{
2 prelude::*,
3 traits::{IdType, Property},
4 IdGenerator,
5};
6use std::{
7 collections::{BTreeMap, HashMap, HashSet},
8 hash::{Hash, Hasher},
9};
10
11mod extend;
12mod insert;
13mod iter;
14mod merge;
15mod query;
16mod remove;
17mod set;
18
19/// A triple store implemented entirely in memory using [BTreeMap][std::collections::BTreeMap].
20///
21/// # Example
22/// ```
23/// # use ulid::Ulid;
24/// # use simple_triplestore::{prelude::*, MemTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder};
25/// let mut db = MemTripleStore::new(UlidIdGenerator::new());
26///
27/// // Get some identifiers. These will probably come from an index such as `Readable Name -> Ulid`
28/// let node_1 = Ulid(123);
29/// let node_2 = Ulid(456);
30/// let node_3 = Ulid(789);
31/// let edge = Ulid(999);
32///
33/// // We can insert nodes and edges with user-defined property types.
34/// // For a given TripleStore we can have one type for Nodes and one for Edges.
35/// db.insert_node(node_1, "foo".to_string())?;
36/// db.insert_node(node_2, "bar".to_string())?;
37/// db.insert_node(node_3, "baz".to_string())?;
38/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3]))?;
39/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6]))?;
40///
41/// // Three vertices with correct properties.
42/// assert_eq!(
43/// db.iter_vertices()
44/// .map(|r| r.expect("ok"))
45/// .collect::<Vec<_>>(),
46/// [
47/// (node_1, "foo".to_string()),
48/// (node_2, "bar".to_string()),
49/// (node_3, "baz".to_string())
50/// ]
51/// );
52///
53/// // Two edges with the correct properties.
54/// assert_eq!(
55/// db.iter_edges_with_props(EdgeOrder::default())
56/// .map(|r| r.expect("ok"))
57/// .collect::<Vec<_>>(),
58/// [
59/// PropsTriple{
60/// sub: (node_1, "foo".to_string()),
61/// pred: (edge, Vec::from([1,2,3])),
62/// obj: (node_2, "bar".to_string())},
63/// PropsTriple{
64/// sub: (node_1, "foo".to_string()),
65/// pred: (edge, Vec::from([4,5,6])),
66/// obj: (node_3, "baz".to_string())}
67/// ]
68/// );
69/// # Ok::<(), ()>(())
70/// ```
71///
72/// We can do arbitrary queries, e.g.:
73/// ```
74/// # use ulid::Ulid;
75/// # use simple_triplestore::{prelude::*, MemTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder, QueryError};
76/// # let mut db = MemTripleStore::new(UlidIdGenerator::new());
77/// # let node_1 = Ulid(123);
78/// # let node_2 = Ulid(456);
79/// # let node_3 = Ulid(789);
80/// # let edge = Ulid(999);
81/// # db.insert_node(node_1, "foo".to_string()).expect("ok");
82/// # db.insert_node(node_2, "bar".to_string()).expect("ok");
83/// # db.insert_node(node_3, "baz".to_string()).expect("ok");
84/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])).expect("ok");
85/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])).expect("ok");
86/// // 1. Edges where node_3 is the object.
87/// assert_eq!(
88/// db.run(query!{ ? -?-> [node_3] })?
89/// .iter_edges(EdgeOrder::default())
90/// .map(|r| r.expect("ok"))
91/// .collect::<Vec<_>>(),
92/// [
93/// (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
94/// ]
95/// );
96///
97/// // Edges with `edge` as the predicate.
98/// assert_eq!(
99/// db.run(query!{ ? -[edge]-> ? })?
100/// .iter_edges(EdgeOrder::default())
101/// .map(|r| r.expect("ok"))
102/// .collect::<Vec<_>>(),
103/// [
104/// (Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])),
105/// (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
106/// ]
107/// );
108///
109/// # Ok::<(), QueryError<(), ()>>(())
110/// ```
111pub struct MemTripleStore<Id: IdType, NodeProps: Property, EdgeProps: Property> {
112 node_props: BTreeMap<Id, NodeProps>,
113 edge_props: BTreeMap<Id, EdgeProps>,
114 spo_data: BTreeMap<Id::TripleByteArrayType, Id>,
115 pos_data: BTreeMap<Id::TripleByteArrayType, Id>,
116 osp_data: BTreeMap<Id::TripleByteArrayType, Id>,
117 id_generator: Box<dyn IdGenerator<Id>>,
118}
119
120impl<Id: IdType, NodeProps: Property, EdgeProps: Property> std::fmt::Debug
121 for MemTripleStore<Id, NodeProps, EdgeProps>
122{
123 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124 f.write_str("MemTripleStore:\n")?;
125 f.write_str(" Node Properties:\n")?;
126 for (id, node_props) in self.node_props.iter() {
127 f.write_fmt(format_args!(" {} -> {:?}\n", id, node_props))?;
128 }
129
130 // When printing edge properties, we display the edge hash instead of the Ulid because it
131 // will be stable across graphs whereas the Ulid is not stable.
132 //
133 // Any of the edge hashes would work here, but spo is chosen arbitrarily.
134 f.write_str(" Edge Properties:\n")?;
135
136 // Construct: [Ulid] -> [u64] (SPO Edge hash)
137 let ulid_to_spo_edge_hash = self
138 .spo_data
139 .iter()
140 .map(|(k, v)| {
141 let hash;
142 {
143 let mut hash_builder = std::hash::DefaultHasher::new();
144 k.hash(&mut hash_builder);
145 hash = hash_builder.finish();
146 }
147 (v.clone(), hash)
148 })
149 .collect::<HashMap<_, _>>();
150
151 // Use [Ulid] -> u64 on the keys of edge_props: [Ulid -> & Edge Properties] to produce:
152 //
153 // [u64] -> [& Edge Properties]
154 //
155 // By using BTreeMap here, we get a nice print order.
156 let hash_to_edge_data = self
157 .edge_props
158 .iter()
159 .map(|(ulid, edge_data)| match ulid_to_spo_edge_hash.get(ulid) {
160 Some(hash) => (Some(hash), edge_data),
161 None => (None, edge_data),
162 })
163 .collect::<BTreeMap<_, _>>();
164
165 for (hash, node_props) in hash_to_edge_data {
166 match hash {
167 None => {
168 f.write_fmt(format_args!(" _ -> {:?}\n", node_props))?;
169 }
170 Some(hash) => {
171 f.write_fmt(format_args!(" {:#016x} -> {:?}\n", hash, node_props))?;
172 }
173 }
174 }
175
176 f.write_str(" Edges (SPO):\n")?;
177 for (triple, ulid) in self.spo_data.iter() {
178 let triple = Id::decode_spo_triple(&triple);
179 f.write_fmt(format_args!(
180 " ({}, {}, {}) -> ",
181 triple.sub, triple.pred, triple.obj
182 ))?;
183 match ulid_to_spo_edge_hash.get(ulid) {
184 Some(hash) => {
185 f.write_fmt(format_args!("{:#016x}\n", hash))?;
186 }
187 None => {
188 f.write_str("_\n")?;
189 }
190 }
191 }
192
193 f.write_str(" Edges (POS):\n")?;
194 for (triple, ulid) in self.pos_data.iter() {
195 let triple = Id::decode_pos_triple(&triple);
196 f.write_fmt(format_args!(
197 " ({}, {}, {}) -> ",
198 triple.sub, triple.pred, triple.obj
199 ))?;
200 match ulid_to_spo_edge_hash.get(ulid) {
201 Some(hash) => {
202 f.write_fmt(format_args!("{:#016x}\n", hash))?;
203 }
204 None => {
205 f.write_str("_\n")?;
206 }
207 }
208 }
209
210 f.write_str(" Edges (OSP):\n")?;
211 for (triple, ulid) in self.osp_data.iter() {
212 let triple = Id::decode_osp_triple(&triple);
213 f.write_fmt(format_args!(
214 " ({}, {}, {}) -> ",
215 triple.sub, triple.pred, triple.obj
216 ))?;
217 match ulid_to_spo_edge_hash.get(ulid) {
218 Some(hash) => {
219 f.write_fmt(format_args!("{:#016x}\n", hash))?;
220 }
221 None => {
222 f.write_str("_\n")?;
223 }
224 }
225 }
226
227 Ok(())
228 }
229}
230
231impl<Id: IdType, NodeProps: Property, EdgeProps: Property> PartialEq
232 for MemTripleStore<Id, NodeProps, EdgeProps>
233{
234 fn eq(&self, other: &Self) -> bool {
235 if !self.node_props.eq(&other.node_props) {
236 return false;
237 }
238
239 // We expect edge data to be identical, so zip them together and test that they match.
240 let mut cached_comparisons: HashSet<(Id, Id)> = HashSet::new();
241 let mut eq_edge_prop_by_id = |self_edge_prop_id, other_edge_prop_id| {
242 let self_edge_props = self.edge_props.get(&self_edge_prop_id);
243 let other_edge_props = other.edge_props.get(&other_edge_prop_id);
244
245 // If either side is missing, we say that the overall result is false.
246 if self_edge_props.is_none() || other_edge_props.is_none() {
247 return false;
248 }
249
250 // If we've seen this before (perhaps on a different ordering), it's true.
251 if cached_comparisons.contains(&(self_edge_prop_id, other_edge_prop_id)) {
252 return true;
253 }
254
255 // Test that the edge properties match.
256 if self_edge_props == other_edge_props {
257 cached_comparisons.insert((self_edge_prop_id, other_edge_prop_id));
258 true
259 } else {
260 false
261 }
262 };
263
264 let mut check_edge =
265 move |((self_edge, self_edge_prop_id), (other_edge, other_edge_prop_id)): (
266 (&Id::TripleByteArrayType, &Id),
267 (&Id::TripleByteArrayType, &Id),
268 )| {
269 // Test the Keys
270 if self_edge.ne(other_edge) {
271 return false;
272 }
273 // Test the Values
274 eq_edge_prop_by_id(self_edge_prop_id.clone(), other_edge_prop_id.clone())
275 };
276
277 // SPO
278 for edge_pair in self.spo_data.iter().zip(other.spo_data.iter()) {
279 if !check_edge(edge_pair) {
280 return false;
281 }
282 }
283
284 true
285 }
286}
287
288impl<Id: IdType, NodeProps: Property, EdgeProps: Property>
289 MemTripleStore<Id, NodeProps, EdgeProps>
290{
291 pub fn new(id_generator: impl IdGenerator<Id> + 'static) -> Self {
292 Self::new_from_boxed_id_generator(Box::new(id_generator))
293 }
294
295 pub(crate) fn new_from_boxed_id_generator(
296 id_generator: Box<dyn IdGenerator<Id> + 'static>,
297 ) -> Self {
298 Self {
299 node_props: BTreeMap::new(),
300 edge_props: BTreeMap::new(),
301 spo_data: BTreeMap::new(),
302 pos_data: BTreeMap::new(),
303 osp_data: BTreeMap::new(),
304 id_generator: id_generator,
305 }
306 }
307}
308
309impl<Id: IdType, NodeProps: Property, EdgeProps: Property> TripleStore<Id, NodeProps, EdgeProps>
310 for MemTripleStore<Id, NodeProps, EdgeProps>
311{
312}
313
314impl<Id: IdType, NodeProps: Property, EdgeProps: Property> TripleStoreError
315 for MemTripleStore<Id, NodeProps, EdgeProps>
316{
317 type Error = ();
318}