rdf_fusion_encoding/object_id/
mapping.rs

1use crate::object_id::{ObjectIdArray, ObjectIdEncoding, ObjectIdScalar};
2use crate::plain_term::{PlainTermArray, PlainTermScalar};
3use crate::typed_value::{TypedValueArray, TypedValueScalar};
4use crate::{EncodingArray, EncodingScalar};
5use datafusion::arrow::error::ArrowError;
6use datafusion::error::DataFusionError;
7use rdf_fusion_model::{CorruptionError, StorageError};
8use std::error::Error;
9use std::fmt::Debug;
10use thiserror::Error;
11
12/// Indicates an error that occurred while working with the [ObjectIdMapping].
13#[derive(Error, Debug)]
14pub enum ObjectIdMappingError {
15    #[error("An error occurred while encoding the result. {0}")]
16    ArrowError(ArrowError),
17    #[error("A literal was encountered at a position where a graph name is expected.")]
18    LiteralAsGraphName,
19    #[error("An unknown object ID was encountered in an unexpected place.")]
20    UnknownObjectId,
21    #[error("An error occurred while accessing the object id storage.")]
22    Storage(Box<dyn Error + Sync + Send>),
23}
24
25#[derive(Error, Debug)]
26#[error("An unknown object ID was encountered in an unexpected place.")]
27pub struct UnknownObjectIdError;
28
29impl From<ArrowError> for ObjectIdMappingError {
30    fn from(value: ArrowError) -> Self {
31        ObjectIdMappingError::ArrowError(value)
32    }
33}
34
35impl From<ObjectIdMappingError> for DataFusionError {
36    fn from(value: ObjectIdMappingError) -> Self {
37        DataFusionError::External(Box::new(value))
38    }
39}
40
41impl From<ObjectIdMappingError> for StorageError {
42    fn from(value: ObjectIdMappingError) -> Self {
43        StorageError::Corruption(CorruptionError::new(value))
44    }
45}
46
47/// The object id mapping is responsible for mapping between object ids and RDF terms in the
48/// [ObjectIdEncoding].
49///
50/// The mapping between the object id and the RDF term is bijective. In other words, each distinct
51/// RDF term maps to exactly one object id, while each object id maps to exactly one RDF term. As
52/// a result, operations that rely on the equality of RDF terms (`SAME_TERM`) can directly work
53/// with the object ids. Joining solution sets is the most important example.
54///
55/// # Typed Values
56///
57/// To speed up decoding object ids directly into the [TypedValueEncoding](crate::typed_value::TypedValueEncoding),
58/// the trait also contains methods for directly mapping object ids to their typed values. This can
59/// be implemented in two ways:
60/// 1. Decode the object id to a plain term and then translate the term to a typed value
61/// 2. Maintain a second mapping from the object ids to the typed value of their associated RDF term
62///
63/// Contrary to the mapping between RDF terms and object ids, the mapping between typed values and
64/// object ids is not bijective. A single typed value can map to multiple object ids. For example,
65/// this is the case for the two RDF terms `"01"^^xsd:integer` and `"1"^^xsd:integer`.
66pub trait ObjectIdMapping: Debug + Send + Sync {
67    /// Returns the encoding.
68    fn encoding(&self) -> ObjectIdEncoding;
69
70    /// Try to retrieve the object id of the given `scalar`.
71    ///
72    /// This method *does not* automatically create a mapping. See [Self::encode_scalar] for this
73    /// functionality.
74    fn try_get_object_id(
75        &self,
76        scalar: &PlainTermScalar,
77    ) -> Result<Option<ObjectIdScalar>, ObjectIdMappingError>;
78
79    /// Encodings the entire `array` as an [ObjectIdArray]. Automatically creates a mapping for a
80    /// fresh object id if a term is not yet mapped.
81    fn encode_array(
82        &self,
83        array: &PlainTermArray,
84    ) -> Result<ObjectIdArray, ObjectIdMappingError>;
85
86    /// Encodes a single `scalar` as an [ObjectIdScalar]. Automatically creates a mapping for a
87    /// fresh object id if the term is not yet mapped.
88    fn encode_scalar(
89        &self,
90        scalar: &PlainTermScalar,
91    ) -> Result<ObjectIdScalar, ObjectIdMappingError> {
92        let array = scalar
93            .to_array(1)
94            .expect("Data type is supported for to_array");
95        let encoded = self.encode_array(&array)?;
96        Ok(encoded.try_as_scalar(0).expect("Row 0 always exists"))
97    }
98
99    /// Decodes the entire `array` as a [PlainTermArray].
100    fn decode_array(
101        &self,
102        array: &ObjectIdArray,
103    ) -> Result<PlainTermArray, ObjectIdMappingError>;
104
105    /// Decodes the entire `array` as a [TypedValueArray].
106    fn decode_array_to_typed_value(
107        &self,
108        array: &ObjectIdArray,
109    ) -> Result<TypedValueArray, ObjectIdMappingError>;
110
111    /// Decodes a single `scalar` as a [PlainTermScalar].
112    fn decode_scalar(
113        &self,
114        scalar: &ObjectIdScalar,
115    ) -> Result<PlainTermScalar, ObjectIdMappingError> {
116        let array = scalar
117            .to_array(1)
118            .expect("Data type is supported for to_array");
119        let encoded = self.decode_array(&array)?;
120        Ok(encoded.try_as_scalar(0).expect("Row 0 always exists"))
121    }
122
123    /// Decodes a single `scalar` as a [TypedValueScalar].
124    fn decode_scalar_to_typed_value(
125        &self,
126        scalar: &ObjectIdScalar,
127    ) -> Result<TypedValueScalar, ObjectIdMappingError> {
128        let array = scalar
129            .to_array(1)
130            .expect("Data type is supported for to_array");
131        let decoded = self.decode_array_to_typed_value(&array)?;
132        Ok(decoded.try_as_scalar(0).expect("Row 0 always exists"))
133    }
134}