terminus_store/layer/
layer.rs

1//! Common data structures and traits for all layer types.
2use std::collections::HashMap;
3use std::hash::Hash;
4
5use tdb_succinct::{TdbDataType, TypedDictEntry};
6
7/// A layer containing dictionary entries and triples.
8///
9/// A layer can be queried. To answer queries, layers will check their
10/// own data structures, and if they have a parent, the parent is
11/// queried as well.
12pub trait Layer: Send + Sync {
13    /// The name of this layer.
14    fn name(&self) -> [u32; 5];
15    fn parent_name(&self) -> Option<[u32; 5]>;
16
17    /// The amount of nodes and values known to this layer.
18    /// This also counts entries in the parent.
19    fn node_and_value_count(&self) -> usize;
20    /// The amount of predicates known to this layer.
21    /// This also counts entries in the parent.
22    fn predicate_count(&self) -> usize;
23
24    /// The numerical id of a subject, or None if the subject cannot be found.
25    fn subject_id(&self, subject: &str) -> Option<u64>;
26    /// The numerical id of a predicate, or None if the predicate cannot be found.
27    fn predicate_id(&self, predicate: &str) -> Option<u64>;
28    /// The numerical id of a node object, or None if the node object cannot be found.
29    fn object_node_id(&self, object: &str) -> Option<u64>;
30    /// The numerical id of a value object, or None if the value object cannot be found.
31    fn object_value_id(&self, object: &TypedDictEntry) -> Option<u64>;
32    /// The subject corresponding to a numerical id, or None if it cannot be found.
33    fn id_subject(&self, id: u64) -> Option<String>;
34
35    /// The predicate corresponding to a numerical id, or None if it cannot be found.
36    fn id_predicate(&self, id: u64) -> Option<String>;
37    /// The object corresponding to a numerical id, or None if it cannot be found.
38    fn id_object(&self, id: u64) -> Option<ObjectType>;
39
40    /// The object node corresponding to a numerical id, or None if it cannot be found. Panics if the object is actually a value.
41    fn id_object_node(&self, id: u64) -> Option<String> {
42        self.id_object(id).map(|o| {
43            o.node()
44                .expect("Expected ObjectType to be node but got a value")
45        })
46    }
47
48    /// The object value corresponding to a numerical id, or None if it cannot be found. Panics if the object is actually a node.
49    fn id_object_value(&self, id: u64) -> Option<TypedDictEntry> {
50        self.id_object(id).map(|o| {
51            o.value()
52                .expect("Expected ObjectType to be value but got a node")
53        })
54    }
55
56    /// Check if the given id refers to a node.
57    ///
58    /// This will return None if the id doesn't refer to anything.
59    fn id_object_is_node(&self, id: u64) -> Option<bool>;
60
61    /// Check if the given id refers to a value.
62    ///
63    /// This will return None if the id doesn't refer to anything.
64    fn id_object_is_value(&self, id: u64) -> Option<bool> {
65        self.id_object_is_node(id).map(|v| !v)
66    }
67
68    /// Create a struct with all the counts
69    fn all_counts(&self) -> LayerCounts;
70
71    /// Return a clone of this layer in a box.
72    fn clone_boxed(&self) -> Box<dyn Layer>;
73
74    /// Returns true if the given triple exists, and false otherwise.
75    fn triple_exists(&self, subject: u64, predicate: u64, object: u64) -> bool;
76
77    /// Returns true if the given triple exists, and false otherwise.
78    fn id_triple_exists(&self, triple: IdTriple) -> bool {
79        self.triple_exists(triple.subject, triple.predicate, triple.object)
80    }
81
82    /// Returns true if the given triple exists, and false otherwise.
83    fn value_triple_exists(&self, triple: &ValueTriple) -> bool {
84        self.value_triple_to_id(triple)
85            .map(|t| self.id_triple_exists(t))
86            .unwrap_or(false)
87    }
88
89    /// Iterator over all triples known to this layer.
90    fn triples(&self) -> Box<dyn Iterator<Item = IdTriple> + Send>;
91
92    fn triples_s(&self, subject: u64) -> Box<dyn Iterator<Item = IdTriple> + Send>;
93    fn triples_sp(&self, subject: u64, predicate: u64)
94        -> Box<dyn Iterator<Item = IdTriple> + Send>;
95
96    /// Convert a `ValueTriple` to an `IdTriple`, returning None if any of the strings in the triple could not be resolved.
97    fn value_triple_to_id(&self, triple: &ValueTriple) -> Option<IdTriple> {
98        self.subject_id(&triple.subject).and_then(|subject| {
99            self.predicate_id(&triple.predicate).and_then(|predicate| {
100                match &triple.object {
101                    ObjectType::Node(node) => self.object_node_id(&node),
102                    ObjectType::Value(value) => self.object_value_id(&value),
103                }
104                .map(|object| IdTriple {
105                    subject,
106                    predicate,
107                    object,
108                })
109            })
110        })
111    }
112
113    fn triples_p(&self, predicate: u64) -> Box<dyn Iterator<Item = IdTriple> + Send>;
114
115    fn triples_o(&self, object: u64) -> Box<dyn Iterator<Item = IdTriple> + Send>;
116
117    /// Convert all known strings in the given string triple to ids.
118    fn value_triple_to_partially_resolved(&self, triple: ValueTriple) -> PartiallyResolvedTriple {
119        PartiallyResolvedTriple {
120            subject: self
121                .subject_id(&triple.subject)
122                .map(PossiblyResolved::Resolved)
123                .unwrap_or(PossiblyResolved::Unresolved(triple.subject)),
124            predicate: self
125                .predicate_id(&triple.predicate)
126                .map(PossiblyResolved::Resolved)
127                .unwrap_or(PossiblyResolved::Unresolved(triple.predicate)),
128            object: match &triple.object {
129                ObjectType::Node(node) => self
130                    .object_node_id(&node)
131                    .map(PossiblyResolved::Resolved)
132                    .unwrap_or(PossiblyResolved::Unresolved(triple.object)),
133                ObjectType::Value(value) => self
134                    .object_value_id(&value)
135                    .map(PossiblyResolved::Resolved)
136                    .unwrap_or(PossiblyResolved::Unresolved(triple.object)),
137            },
138        }
139    }
140
141    /// Convert an id triple to the corresponding string version, returning None if any of those ids could not be converted.
142    fn id_triple_to_string(&self, triple: &IdTriple) -> Option<ValueTriple> {
143        self.id_subject(triple.subject).and_then(|subject| {
144            self.id_predicate(triple.predicate).and_then(|predicate| {
145                self.id_object(triple.object).map(|object| ValueTriple {
146                    subject,
147                    predicate,
148                    object,
149                })
150            })
151        })
152    }
153
154    /// Returns the total amount of triple additions in this layer and all its parents.
155    fn triple_addition_count(&self) -> usize;
156
157    /// Returns the total amount of triple removals in this layer and all its parents.
158    fn triple_removal_count(&self) -> usize;
159
160    /// Returns the total amount of triples in this layer and all its parents.
161    fn triple_count(&self) -> usize {
162        self.triple_addition_count() - self.triple_removal_count()
163    }
164
165    fn single_triple_sp(&self, subject: u64, predicate: u64) -> Option<IdTriple>;
166}
167
168pub struct LayerCounts {
169    pub node_count: usize,
170    pub predicate_count: usize,
171    pub value_count: usize,
172}
173
174/// A triple, stored as numerical ids.
175#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
176pub struct IdTriple {
177    pub subject: u64,
178    pub predicate: u64,
179    pub object: u64,
180}
181
182impl IdTriple {
183    /// Construct a new id triple.
184    pub fn new(subject: u64, predicate: u64, object: u64) -> Self {
185        IdTriple {
186            subject,
187            predicate,
188            object,
189        }
190    }
191
192    /// convert this triple into a `PartiallyResolvedTriple`, which is a data structure used in layer building.
193    pub fn to_resolved(&self) -> PartiallyResolvedTriple {
194        PartiallyResolvedTriple {
195            subject: PossiblyResolved::Resolved(self.subject),
196            predicate: PossiblyResolved::Resolved(self.predicate),
197            object: PossiblyResolved::Resolved(self.object),
198        }
199    }
200}
201
202/// A triple stored as strings.
203#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
204pub struct ValueTriple {
205    pub subject: String,
206    pub predicate: String,
207    pub object: ObjectType,
208}
209
210impl ValueTriple {
211    /// Construct a triple with a node object.
212    ///
213    /// Nodes may appear in both the subject and object position.
214    pub fn new_node(subject: &str, predicate: &str, object: &str) -> ValueTriple {
215        ValueTriple {
216            subject: subject.to_owned(),
217            predicate: predicate.to_owned(),
218            object: ObjectType::Node(object.to_owned()),
219        }
220    }
221
222    /// Construct a triple with a value object.
223    ///
224    /// Values may only appear in the object position.
225    pub fn new_value(subject: &str, predicate: &str, object: TypedDictEntry) -> ValueTriple {
226        ValueTriple {
227            subject: subject.to_owned(),
228            predicate: predicate.to_owned(),
229            object: ObjectType::Value(object),
230        }
231    }
232
233    /// Construct a triple with a string value object.
234    ///
235    /// Values may only appear in the object position.
236    pub fn new_string_value(subject: &str, predicate: &str, object: &str) -> ValueTriple {
237        ValueTriple {
238            subject: subject.to_owned(),
239            predicate: predicate.to_owned(),
240            object: ObjectType::Value(String::make_entry(&object)),
241        }
242    }
243
244    /// Convert this triple to a `PartiallyResolvedTriple`, marking each field as unresolved.
245    pub fn to_unresolved(self) -> PartiallyResolvedTriple {
246        PartiallyResolvedTriple {
247            subject: PossiblyResolved::Unresolved(self.subject),
248            predicate: PossiblyResolved::Unresolved(self.predicate),
249            object: PossiblyResolved::Unresolved(self.object),
250        }
251    }
252}
253
254/// Either a resolved id or an unresolved inner type.
255#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
256pub enum PossiblyResolved<T: Clone + PartialEq + Eq + PartialOrd + Ord + Hash> {
257    Unresolved(T),
258    Resolved(u64),
259}
260
261impl<T: Clone + PartialEq + Eq + PartialOrd + Ord + Hash> PossiblyResolved<T> {
262    /// Returns true if this is a resolved id, and false otherwise.
263    pub fn is_resolved(&self) -> bool {
264        match self {
265            Self::Unresolved(_) => false,
266            Self::Resolved(_) => true,
267        }
268    }
269
270    /// Return a PossiblyResolved with the inner value as a reference.
271    pub fn as_ref(&self) -> PossiblyResolved<&T> {
272        match self {
273            Self::Unresolved(u) => PossiblyResolved::Unresolved(&u),
274            Self::Resolved(id) => PossiblyResolved::Resolved(*id),
275        }
276    }
277
278    /// Unwrap to the unresolved inner value, or panic if this was actually a resolved id.
279    pub fn unwrap_unresolved(self) -> T {
280        match self {
281            Self::Unresolved(u) => u,
282            Self::Resolved(_) => panic!("tried to unwrap unresolved, but got a resolved"),
283        }
284    }
285
286    /// Unwrap to the resolved id, or panic if this was actually an unresolved value.
287    pub fn unwrap_resolved(self) -> u64 {
288        match self {
289            Self::Unresolved(_) => panic!("tried to unwrap resolved, but got an unresolved"),
290            Self::Resolved(id) => id,
291        }
292    }
293}
294
295/// A triple where the subject, predicate and object can all either be fully resolved to an id, or unresolved.
296#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
297pub struct PartiallyResolvedTriple {
298    pub subject: PossiblyResolved<String>,
299    pub predicate: PossiblyResolved<String>,
300    pub object: PossiblyResolved<ObjectType>,
301}
302
303impl PartiallyResolvedTriple {
304    /// Resolve the unresolved ids in this triple using the given hashmaps for nodes, predicates and values.
305    pub fn resolve_with(
306        &self,
307        node_map: &HashMap<String, u64>,
308        predicate_map: &HashMap<String, u64>,
309        value_map: &HashMap<TypedDictEntry, u64>,
310    ) -> Option<IdTriple> {
311        let subject = match self.subject.as_ref() {
312            PossiblyResolved::Unresolved(s) => *node_map.get(s)?,
313            PossiblyResolved::Resolved(id) => id,
314        };
315        let predicate = match self.predicate.as_ref() {
316            PossiblyResolved::Unresolved(p) => *predicate_map.get(p)?,
317            PossiblyResolved::Resolved(id) => id,
318        };
319        let object = match self.object.as_ref() {
320            PossiblyResolved::Unresolved(ObjectType::Node(n)) => *node_map.get(n)?,
321            PossiblyResolved::Unresolved(ObjectType::Value(v)) => *value_map.get(v)?,
322            PossiblyResolved::Resolved(id) => id,
323        };
324
325        Some(IdTriple {
326            subject,
327            predicate,
328            object,
329        })
330    }
331
332    pub fn as_resolved(&self) -> Option<IdTriple> {
333        if !self.subject.is_resolved()
334            || !self.predicate.is_resolved()
335            || !self.object.is_resolved()
336        {
337            None
338        } else {
339            Some(IdTriple::new(
340                self.subject.as_ref().unwrap_resolved(),
341                self.predicate.as_ref().unwrap_resolved(),
342                self.object.as_ref().unwrap_resolved(),
343            ))
344        }
345    }
346
347    pub fn make_resolved_or_zero(&mut self) {
348        if !self.subject.is_resolved()
349            || !self.predicate.is_resolved()
350            || !self.object.is_resolved()
351        {
352            self.subject = PossiblyResolved::Resolved(0);
353            self.predicate = PossiblyResolved::Resolved(0);
354            self.object = PossiblyResolved::Resolved(0);
355        }
356    }
357}
358
359/// The type of an object in a triple.
360///
361/// Objects in a triple may either be a node or a value. Nodes can be
362/// used both in the subject and the object position, while values are
363/// only used in the object position.
364///
365/// Terminus-store keeps track of whether an object was stored as a
366/// node or a value, and will return this information in queries. It
367/// is possible to have the same string appear both as a node and a
368/// value, without this leading to conflicts.
369#[derive(Debug, Clone, PartialOrd, PartialEq, Eq, Ord, Hash)]
370pub enum ObjectType {
371    Node(String),
372    Value(TypedDictEntry),
373}
374
375impl ObjectType {
376    pub fn node(self) -> Option<String> {
377        match self {
378            ObjectType::Node(s) => Some(s),
379            ObjectType::Value(_) => None,
380        }
381    }
382
383    pub fn node_ref(&self) -> Option<&str> {
384        match self {
385            ObjectType::Node(s) => Some(s),
386            ObjectType::Value(_) => None,
387        }
388    }
389
390    pub fn value(self) -> Option<TypedDictEntry> {
391        match self {
392            ObjectType::Node(_) => None,
393            ObjectType::Value(v) => Some(v),
394        }
395    }
396
397    pub fn value_ref(&self) -> Option<&TypedDictEntry> {
398        match self {
399            ObjectType::Node(_) => None,
400            ObjectType::Value(v) => Some(v),
401        }
402    }
403}
404
405#[cfg(test)]
406mod tests {
407    use super::*;
408    use crate::layer::internal::base::base_tests::base_layer_files;
409    use crate::layer::internal::base::BaseLayer;
410    use crate::layer::internal::child::child_tests::child_layer_files;
411    use crate::layer::internal::child::ChildLayer;
412    use crate::layer::internal::InternalLayer;
413    use crate::layer::simple_builder::{LayerBuilder, SimpleLayerBuilder};
414    use std::sync::Arc;
415
416    #[tokio::test]
417    async fn find_triple_after_adjacent_removal() {
418        let files = base_layer_files();
419        let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
420
421        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
422        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "sniff"));
423
424        builder.commit().await.unwrap();
425
426        let base: Arc<InternalLayer> = Arc::new(
427            BaseLayer::load_from_files([1, 2, 3, 4, 5], &files)
428                .await
429                .unwrap()
430                .into(),
431        );
432
433        let files = child_layer_files();
434        let mut builder =
435            SimpleLayerBuilder::from_parent([5, 4, 3, 2, 1], base.clone(), files.clone());
436        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
437        builder.commit().await.unwrap();
438
439        let child: Arc<InternalLayer> = Arc::new(
440            ChildLayer::load_from_files([5, 4, 3, 2, 1], base.clone(), &files)
441                .await
442                .unwrap()
443                .into(),
444        );
445
446        // TODO why are we not using these results?
447        let _base_triples_additions: Vec<_> = base
448            .internal_triple_additions()
449            .map(|t| child.id_triple_to_string(&t).unwrap())
450            .collect();
451
452        let _triples_additions: Vec<_> = child
453            .internal_triple_additions()
454            .map(|t| child.id_triple_to_string(&t).unwrap())
455            .collect();
456
457        let _triples_removals: Vec<_> = child
458            .internal_triple_removals()
459            .map(|t| child.id_triple_to_string(&t).unwrap())
460            .collect();
461
462        let triples: Vec<_> = child
463            .triples()
464            .map(|t| child.id_triple_to_string(&t).unwrap())
465            .collect();
466
467        assert_eq!(
468            vec![ValueTriple::new_string_value("cow", "says", "sniff")],
469            triples
470        );
471    }
472
473    #[tokio::test]
474    async fn find_triple_after_removal_and_readdition() {
475        let files = base_layer_files();
476        let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
477
478        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
479
480        builder.commit().await.unwrap();
481
482        let base: Arc<InternalLayer> = Arc::new(
483            BaseLayer::load_from_files([1, 2, 3, 4, 5], &files)
484                .await
485                .unwrap()
486                .into(),
487        );
488
489        let files = child_layer_files();
490        let mut builder =
491            SimpleLayerBuilder::from_parent([5, 4, 3, 2, 1], base.clone(), files.clone());
492        builder.remove_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
493        builder.commit().await.unwrap();
494
495        let child: Arc<InternalLayer> = Arc::new(
496            ChildLayer::load_from_files([5, 4, 3, 2, 1], base, &files)
497                .await
498                .unwrap()
499                .into(),
500        );
501
502        let files = child_layer_files();
503        let mut builder =
504            SimpleLayerBuilder::from_parent([5, 4, 3, 2, 2], child.clone(), files.clone());
505        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
506        builder.commit().await.unwrap();
507
508        let child: Arc<InternalLayer> = Arc::new(
509            ChildLayer::load_from_files([5, 4, 3, 2, 2], child, &files)
510                .await
511                .unwrap()
512                .into(),
513        );
514
515        let triples: Vec<_> = child
516            .triples()
517            .map(|t| child.id_triple_to_string(&t).unwrap())
518            .collect();
519
520        assert_eq!(
521            vec![ValueTriple::new_string_value("cow", "says", "moo")],
522            triples
523        );
524    }
525
526    #[tokio::test]
527    async fn find_single_triple_sp() {
528        let files = base_layer_files();
529        let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
530
531        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "quack"));
532        builder.add_value_triple(ValueTriple::new_string_value("duck", "says", "neigh"));
533
534        builder.commit().await.unwrap();
535
536        let base: Arc<InternalLayer> = Arc::new(
537            BaseLayer::load_from_files([1, 2, 3, 4, 5], &files)
538                .await
539                .unwrap()
540                .into(),
541        );
542
543        let files = child_layer_files();
544        let mut builder =
545            SimpleLayerBuilder::from_parent([5, 4, 3, 2, 1], base.clone(), files.clone());
546        builder.remove_value_triple(ValueTriple::new_string_value("duck", "says", "neigh"));
547        builder.commit().await.unwrap();
548
549        let child: Arc<InternalLayer> = Arc::new(
550            ChildLayer::load_from_files([5, 4, 3, 2, 1], base, &files)
551                .await
552                .unwrap()
553                .into(),
554        );
555
556        let files = child_layer_files();
557        let mut builder =
558            SimpleLayerBuilder::from_parent([5, 4, 3, 2, 2], child.clone(), files.clone());
559        builder.add_value_triple(ValueTriple::new_string_value("cow", "says", "moo"));
560        builder.commit().await.unwrap();
561
562        let child: Arc<InternalLayer> = Arc::new(
563            ChildLayer::load_from_files([5, 4, 3, 2, 2], child, &files)
564                .await
565                .unwrap()
566                .into(),
567        );
568
569        let id_triple_1 = child
570            .single_triple_sp(
571                child.subject_id("cow").unwrap(),
572                child.predicate_id("says").unwrap(),
573            )
574            .unwrap();
575        let triple_1 = child.id_triple_to_string(&id_triple_1).unwrap();
576
577        let id_triple_2 = child
578            .single_triple_sp(
579                child.subject_id("duck").unwrap(),
580                child.predicate_id("says").unwrap(),
581            )
582            .unwrap();
583        let triple_2 = child.id_triple_to_string(&id_triple_2).unwrap();
584
585        assert_eq!(
586            ValueTriple::new_string_value("cow", "says", "moo"),
587            triple_1
588        );
589        assert_eq!(
590            ValueTriple::new_string_value("duck", "says", "quack"),
591            triple_2
592        );
593    }
594
595    #[tokio::test]
596    async fn find_nonstring_triples() {
597        let files = base_layer_files();
598        let mut builder = SimpleLayerBuilder::new([1, 2, 3, 4, 5], files.clone());
599
600        builder.add_value_triple(ValueTriple::new_value(
601            "duck",
602            "num_feet",
603            u32::make_entry(&2),
604        ));
605        builder.add_value_triple(ValueTriple::new_value(
606            "cow",
607            "num_feet",
608            u32::make_entry(&4),
609        ));
610        builder.add_value_triple(ValueTriple::new_value(
611            "disabled_cow",
612            "num_feet",
613            u32::make_entry(&3),
614        ));
615        builder.add_value_triple(ValueTriple::new_value(
616            "duck",
617            "swims",
618            String::make_entry(&"true"),
619        ));
620        builder.add_value_triple(ValueTriple::new_value(
621            "cow",
622            "swims",
623            String::make_entry(&"false"),
624        ));
625        builder.add_value_triple(ValueTriple::new_value(
626            "disabled_cow",
627            "swims",
628            String::make_entry(&"false"),
629        ));
630
631        builder.commit().await.unwrap();
632
633        let base: Arc<InternalLayer> = Arc::new(
634            BaseLayer::load_from_files([1, 2, 3, 4, 5], &files)
635                .await
636                .unwrap()
637                .into(),
638        );
639
640        let mut results: Vec<_> = base
641            .triples_p(base.predicate_id("num_feet").unwrap())
642            .map(|t| {
643                (
644                    base.id_subject(t.subject).unwrap(),
645                    base.id_object_value(t.object).unwrap().as_val::<u32, u32>(),
646                )
647            })
648            .collect();
649        results.sort();
650
651        let expected = vec![
652            ("cow".to_owned(), 4),
653            ("disabled_cow".to_owned(), 3),
654            ("duck".to_owned(), 2),
655        ];
656
657        assert_eq!(expected, results);
658    }
659}