rdf_fusion_encoding/
quad_storage_encoding.rs

1use crate::TermEncoding;
2use crate::object_id::ObjectIdEncoding;
3use crate::plain_term::{PLAIN_TERM_ENCODING, PlainTermEncoding};
4use datafusion::arrow::datatypes::{DataType, Field, Fields, Schema, SchemaRef};
5use datafusion::common::{DFSchema, DFSchemaRef};
6use rdf_fusion_model::quads::{COL_GRAPH, COL_OBJECT, COL_PREDICATE, COL_SUBJECT};
7use std::collections::HashMap;
8use std::sync::{Arc, LazyLock};
9
10/// Defines which encoding is used for retrieving quads from the storage.
11///
12/// Defining this is necessary such that the query planner knows what type should be assigned to the
13/// schema of quad pattern logical nodes.
14#[derive(Debug, Clone, PartialEq, Eq, Hash)]
15pub enum QuadStorageEncoding {
16    /// Uses the plain term encoding.
17    ///
18    /// Currently, the plain term encoding is not parameterizable. Therefore, this variant has no
19    /// further information.
20    PlainTerm,
21    /// Uses the provided object id encoding.
22    ObjectId(ObjectIdEncoding),
23}
24
25static PLAIN_TERM_QUAD_SCHEMA: LazyLock<SchemaRef> = LazyLock::new(|| {
26    SchemaRef::new(Schema::new(vec![
27        Field::new(COL_GRAPH, PlainTermEncoding::data_type(), true),
28        Field::new(COL_SUBJECT, PlainTermEncoding::data_type(), false),
29        Field::new(COL_PREDICATE, PlainTermEncoding::data_type(), false),
30        Field::new(COL_OBJECT, PlainTermEncoding::data_type(), false),
31    ]))
32});
33
34static PLAIN_TERM_QUAD_DFSCHEMA: LazyLock<DFSchemaRef> = LazyLock::new(|| {
35    DFSchemaRef::new(DFSchema::try_from(PLAIN_TERM_QUAD_SCHEMA.clone()).unwrap())
36});
37
38impl QuadStorageEncoding {
39    /// Returns the data type of a single term column, given the current encoding.
40    pub fn term_type(&self) -> DataType {
41        match self {
42            QuadStorageEncoding::PlainTerm => PLAIN_TERM_ENCODING.data_type(),
43            QuadStorageEncoding::ObjectId(enc) => enc.data_type(),
44        }
45    }
46
47    /// Returns the schema of an entire quad, given the current encoding.
48    pub fn quad_schema(&self) -> DFSchemaRef {
49        match self {
50            QuadStorageEncoding::PlainTerm => PLAIN_TERM_QUAD_DFSCHEMA.clone(),
51            QuadStorageEncoding::ObjectId(encoding) => object_id_quad_schema(encoding),
52        }
53    }
54
55    /// Returns an optional reference to the contained [ObjectIdEncoding].
56    ///
57    /// Returns [None] otherwise.
58    pub fn object_id_encoding(&self) -> Option<&ObjectIdEncoding> {
59        match &self {
60            QuadStorageEncoding::ObjectId(encoding) => Some(encoding),
61            QuadStorageEncoding::PlainTerm => None,
62        }
63    }
64}
65
66/// Computes the quad schema based on the given [ObjectIdEncoding].
67fn object_id_quad_schema(encoding: &ObjectIdEncoding) -> DFSchemaRef {
68    let data_type = encoding.data_type();
69    Arc::new(
70        DFSchema::from_unqualified_fields(
71            Fields::from(vec![
72                Field::new(COL_GRAPH, data_type.clone(), true),
73                Field::new(COL_SUBJECT, data_type.clone(), false),
74                Field::new(COL_PREDICATE, data_type.clone(), false),
75                Field::new(COL_OBJECT, data_type, false),
76            ]),
77            HashMap::new(),
78        )
79        .expect("Fields are fixed"),
80    )
81}