simple_triplestore/
mem.rs

1use crate::{
2    prelude::*,
3    traits::{IdType, Property},
4    IdGenerator,
5};
6use std::{
7    collections::{BTreeMap, HashMap, HashSet},
8    hash::{Hash, Hasher},
9};
10
11mod extend;
12mod insert;
13mod iter;
14mod merge;
15mod query;
16mod remove;
17mod set;
18
19/// A triple store implemented entirely in memory using [BTreeMap][std::collections::BTreeMap].
20///
21/// # Example
22/// ```
23/// # use ulid::Ulid;
24/// # use simple_triplestore::{prelude::*, MemTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder};
25/// let mut db = MemTripleStore::new(UlidIdGenerator::new());
26///
27/// // Get some identifiers. These will probably come from an index such as `Readable Name -> Ulid`
28/// let node_1 = Ulid(123);
29/// let node_2 = Ulid(456);
30/// let node_3 = Ulid(789);
31/// let edge = Ulid(999);
32///
33/// // We can insert nodes and edges with user-defined property types.
34/// // For a given TripleStore we can have one type for Nodes and one for Edges.
35/// db.insert_node(node_1, "foo".to_string())?;
36/// db.insert_node(node_2, "bar".to_string())?;
37/// db.insert_node(node_3, "baz".to_string())?;
38/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3]))?;
39/// db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6]))?;
40///
41/// // Three vertices with correct properties.
42/// assert_eq!(
43///   db.iter_vertices()
44///     .map(|r| r.expect("ok"))
45///     .collect::<Vec<_>>(),  
46///   [
47///     (node_1, "foo".to_string()),
48///     (node_2, "bar".to_string()),
49///     (node_3, "baz".to_string())
50///   ]
51/// );
52///
53/// // Two edges with the correct properties.
54/// assert_eq!(
55///   db.iter_edges_with_props(EdgeOrder::default())
56///     .map(|r| r.expect("ok"))
57///     .collect::<Vec<_>>(),
58///   [
59///     PropsTriple{
60///       sub: (node_1, "foo".to_string()),
61///       pred: (edge, Vec::from([1,2,3])),
62///       obj: (node_2, "bar".to_string())},
63///     PropsTriple{
64///       sub: (node_1, "foo".to_string()),
65///       pred: (edge, Vec::from([4,5,6])),
66///       obj: (node_3, "baz".to_string())}
67///   ]
68/// );
69/// # Ok::<(), ()>(())
70/// ```
71///
72/// We can do arbitrary queries, e.g.:
73/// ```
74/// # use ulid::Ulid;
75/// # use simple_triplestore::{prelude::*, MemTripleStore, PropsTriple, Triple, UlidIdGenerator, EdgeOrder, QueryError};
76/// # let mut db = MemTripleStore::new(UlidIdGenerator::new());
77/// # let node_1 = Ulid(123);
78/// # let node_2 = Ulid(456);
79/// # let node_3 = Ulid(789);
80/// # let edge = Ulid(999);
81/// # db.insert_node(node_1, "foo".to_string()).expect("ok");
82/// # db.insert_node(node_2, "bar".to_string()).expect("ok");
83/// # db.insert_node(node_3, "baz".to_string()).expect("ok");
84/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])).expect("ok");
85/// # db.insert_edge(Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])).expect("ok");
86/// // 1. Edges where node_3 is the object.
87/// assert_eq!(
88///   db.run(query!{ ? -?-> [node_3] })?
89///     .iter_edges(EdgeOrder::default())
90///     .map(|r| r.expect("ok"))
91///     .collect::<Vec<_>>(),
92///   [
93///     (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
94///   ]
95/// );
96///
97/// // Edges with `edge` as the predicate.
98/// assert_eq!(
99///   db.run(query!{ ? -[edge]-> ? })?
100///     .iter_edges(EdgeOrder::default())
101///     .map(|r| r.expect("ok"))
102///     .collect::<Vec<_>>(),
103///   [
104///     (Triple{sub: node_1, pred: edge, obj: node_2}, Vec::from([1,2,3])),
105///     (Triple{sub: node_1, pred: edge, obj: node_3}, Vec::from([4,5,6])),
106///   ]
107/// );
108///
109/// # Ok::<(), QueryError<(), ()>>(())
110/// ```
111pub struct MemTripleStore<Id: IdType, NodeProps: Property, EdgeProps: Property> {
112    node_props: BTreeMap<Id, NodeProps>,
113    edge_props: BTreeMap<Id, EdgeProps>,
114    spo_data: BTreeMap<Id::TripleByteArrayType, Id>,
115    pos_data: BTreeMap<Id::TripleByteArrayType, Id>,
116    osp_data: BTreeMap<Id::TripleByteArrayType, Id>,
117    id_generator: Box<dyn IdGenerator<Id>>,
118}
119
120impl<Id: IdType, NodeProps: Property, EdgeProps: Property> std::fmt::Debug
121    for MemTripleStore<Id, NodeProps, EdgeProps>
122{
123    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124        f.write_str("MemTripleStore:\n")?;
125        f.write_str(" Node Properties:\n")?;
126        for (id, node_props) in self.node_props.iter() {
127            f.write_fmt(format_args!("  {} -> {:?}\n", id, node_props))?;
128        }
129
130        // When printing edge properties, we display the edge hash instead of the Ulid because it
131        // will be stable across graphs whereas the Ulid is not stable.
132        //
133        // Any of the edge hashes would work here, but spo is chosen arbitrarily.
134        f.write_str(" Edge Properties:\n")?;
135
136        // Construct: [Ulid] -> [u64] (SPO Edge hash)
137        let ulid_to_spo_edge_hash = self
138            .spo_data
139            .iter()
140            .map(|(k, v)| {
141                let hash;
142                {
143                    let mut hash_builder = std::hash::DefaultHasher::new();
144                    k.hash(&mut hash_builder);
145                    hash = hash_builder.finish();
146                }
147                (v.clone(), hash)
148            })
149            .collect::<HashMap<_, _>>();
150
151        // Use [Ulid] -> u64 on the keys of edge_props: [Ulid -> & Edge Properties] to produce:
152        //
153        //  [u64] -> [& Edge Properties]
154        //
155        // By using BTreeMap here, we get a nice print order.
156        let hash_to_edge_data = self
157            .edge_props
158            .iter()
159            .map(|(ulid, edge_data)| match ulid_to_spo_edge_hash.get(ulid) {
160                Some(hash) => (Some(hash), edge_data),
161                None => (None, edge_data),
162            })
163            .collect::<BTreeMap<_, _>>();
164
165        for (hash, node_props) in hash_to_edge_data {
166            match hash {
167                None => {
168                    f.write_fmt(format_args!("  _ -> {:?}\n", node_props))?;
169                }
170                Some(hash) => {
171                    f.write_fmt(format_args!("  {:#016x} -> {:?}\n", hash, node_props))?;
172                }
173            }
174        }
175
176        f.write_str(" Edges (SPO):\n")?;
177        for (triple, ulid) in self.spo_data.iter() {
178            let triple = Id::decode_spo_triple(&triple);
179            f.write_fmt(format_args!(
180                "  ({}, {}, {}) -> ",
181                triple.sub, triple.pred, triple.obj
182            ))?;
183            match ulid_to_spo_edge_hash.get(ulid) {
184                Some(hash) => {
185                    f.write_fmt(format_args!("{:#016x}\n", hash))?;
186                }
187                None => {
188                    f.write_str("_\n")?;
189                }
190            }
191        }
192
193        f.write_str(" Edges (POS):\n")?;
194        for (triple, ulid) in self.pos_data.iter() {
195            let triple = Id::decode_pos_triple(&triple);
196            f.write_fmt(format_args!(
197                "  ({}, {}, {}) -> ",
198                triple.sub, triple.pred, triple.obj
199            ))?;
200            match ulid_to_spo_edge_hash.get(ulid) {
201                Some(hash) => {
202                    f.write_fmt(format_args!("{:#016x}\n", hash))?;
203                }
204                None => {
205                    f.write_str("_\n")?;
206                }
207            }
208        }
209
210        f.write_str(" Edges (OSP):\n")?;
211        for (triple, ulid) in self.osp_data.iter() {
212            let triple = Id::decode_osp_triple(&triple);
213            f.write_fmt(format_args!(
214                "  ({}, {}, {}) -> ",
215                triple.sub, triple.pred, triple.obj
216            ))?;
217            match ulid_to_spo_edge_hash.get(ulid) {
218                Some(hash) => {
219                    f.write_fmt(format_args!("{:#016x}\n", hash))?;
220                }
221                None => {
222                    f.write_str("_\n")?;
223                }
224            }
225        }
226
227        Ok(())
228    }
229}
230
231impl<Id: IdType, NodeProps: Property, EdgeProps: Property> PartialEq
232    for MemTripleStore<Id, NodeProps, EdgeProps>
233{
234    fn eq(&self, other: &Self) -> bool {
235        if !self.node_props.eq(&other.node_props) {
236            return false;
237        }
238
239        // We expect edge data to be identical, so zip them together and test that they match.
240        let mut cached_comparisons: HashSet<(Id, Id)> = HashSet::new();
241        let mut eq_edge_prop_by_id = |self_edge_prop_id, other_edge_prop_id| {
242            let self_edge_props = self.edge_props.get(&self_edge_prop_id);
243            let other_edge_props = other.edge_props.get(&other_edge_prop_id);
244
245            // If either side is missing, we say that the overall result is false.
246            if self_edge_props.is_none() || other_edge_props.is_none() {
247                return false;
248            }
249
250            // If we've seen this before (perhaps on a different ordering), it's true.
251            if cached_comparisons.contains(&(self_edge_prop_id, other_edge_prop_id)) {
252                return true;
253            }
254
255            // Test that the edge properties match.
256            if self_edge_props == other_edge_props {
257                cached_comparisons.insert((self_edge_prop_id, other_edge_prop_id));
258                true
259            } else {
260                false
261            }
262        };
263
264        let mut check_edge =
265            move |((self_edge, self_edge_prop_id), (other_edge, other_edge_prop_id)): (
266                (&Id::TripleByteArrayType, &Id),
267                (&Id::TripleByteArrayType, &Id),
268            )| {
269                // Test the Keys
270                if self_edge.ne(other_edge) {
271                    return false;
272                }
273                // Test the Values
274                eq_edge_prop_by_id(self_edge_prop_id.clone(), other_edge_prop_id.clone())
275            };
276
277        // SPO
278        for edge_pair in self.spo_data.iter().zip(other.spo_data.iter()) {
279            if !check_edge(edge_pair) {
280                return false;
281            }
282        }
283
284        true
285    }
286}
287
288impl<Id: IdType, NodeProps: Property, EdgeProps: Property>
289    MemTripleStore<Id, NodeProps, EdgeProps>
290{
291    pub fn new(id_generator: impl IdGenerator<Id> + 'static) -> Self {
292        Self::new_from_boxed_id_generator(Box::new(id_generator))
293    }
294
295    pub(crate) fn new_from_boxed_id_generator(
296        id_generator: Box<dyn IdGenerator<Id> + 'static>,
297    ) -> Self {
298        Self {
299            node_props: BTreeMap::new(),
300            edge_props: BTreeMap::new(),
301            spo_data: BTreeMap::new(),
302            pos_data: BTreeMap::new(),
303            osp_data: BTreeMap::new(),
304            id_generator: id_generator,
305        }
306    }
307}
308
309impl<Id: IdType, NodeProps: Property, EdgeProps: Property> TripleStore<Id, NodeProps, EdgeProps>
310    for MemTripleStore<Id, NodeProps, EdgeProps>
311{
312}
313
314impl<Id: IdType, NodeProps: Property, EdgeProps: Property> TripleStoreError
315    for MemTripleStore<Id, NodeProps, EdgeProps>
316{
317    type Error = ();
318}