rdf_fusion_encoding/plain_term/
encoding.rs

1use crate::encoding::TermEncoding;
2use crate::plain_term::encoders::DefaultPlainTermEncoder;
3use crate::plain_term::{PlainTermArray, PlainTermScalar};
4use crate::{EncodingName, TermEncoder};
5use datafusion::arrow::array::ArrayRef;
6use datafusion::arrow::datatypes::{DataType, Field, Fields};
7use datafusion::common::ScalarValue;
8use rdf_fusion_model::DFResult;
9use rdf_fusion_model::{TermRef, ThinResult};
10use std::clone::Clone;
11use std::fmt::Display;
12use std::sync::LazyLock;
13use thiserror::Error;
14
15/// Represents the fields of the [PlainTermEncoding].
16#[derive(Debug, Clone, Copy, PartialEq, Eq)]
17pub enum PlainTermEncodingField {
18    /// Indicates the type of RDF term.
19    TermType,
20    /// Contains the lexical value of an RDF term.
21    Value,
22    /// Holds the data type of RDF literal, including simple literals and language-tagged literals.
23    /// If an RDF term has a language tag, the datatype must contain rdf:langString.
24    ///
25    /// This filed should be `null` for named nodes and blank nodes.
26    DataType,
27    /// Contains an optional language tag for language-tagged literals.
28    ///
29    /// This field should be `null` for named nodes, blank nodes, and literals without a language
30    /// tag.
31    LanguageTag,
32}
33
34impl PlainTermEncodingField {
35    pub fn name(self) -> &'static str {
36        match self {
37            PlainTermEncodingField::TermType => "term_type",
38            PlainTermEncodingField::Value => "value",
39            PlainTermEncodingField::DataType => "data_type",
40            PlainTermEncodingField::LanguageTag => "language_tag",
41        }
42    }
43
44    pub fn index(self) -> usize {
45        match self {
46            PlainTermEncodingField::TermType => 0,
47            PlainTermEncodingField::Value => 1,
48            PlainTermEncodingField::DataType => 2,
49            PlainTermEncodingField::LanguageTag => 3,
50        }
51    }
52
53    #[allow(clippy::match_same_arms)]
54    pub fn data_type(self) -> DataType {
55        match self {
56            PlainTermEncodingField::TermType => DataType::UInt8,
57            PlainTermEncodingField::Value => DataType::Utf8,
58            PlainTermEncodingField::DataType => DataType::Utf8,
59            PlainTermEncodingField::LanguageTag => DataType::Utf8,
60        }
61    }
62
63    #[allow(clippy::match_same_arms)]
64    pub fn is_nullable(self) -> bool {
65        match self {
66            PlainTermEncodingField::TermType => false,
67            PlainTermEncodingField::Value => false,
68            PlainTermEncodingField::DataType => true,
69            PlainTermEncodingField::LanguageTag => true,
70        }
71    }
72
73    pub fn field(self) -> Field {
74        Field::new(self.name(), self.data_type(), self.is_nullable())
75    }
76}
77
78static FIELDS_TYPE: LazyLock<Fields> = LazyLock::new(|| {
79    let fields = vec![
80        PlainTermEncodingField::TermType.field(),
81        PlainTermEncodingField::Value.field(),
82        PlainTermEncodingField::DataType.field(),
83        PlainTermEncodingField::LanguageTag.field(),
84    ];
85    Fields::from(fields)
86});
87
88/// Indicates the type of an RDF term that is encoded in the [PlainTermEncoding].
89#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
90pub enum PlainTermType {
91    /// Represents a named node.
92    NamedNode,
93    /// Represents a blank node.
94    BlankNode,
95    /// Represents a literal.
96    Literal,
97}
98
99#[derive(Debug, Clone, Copy, Default, Error, PartialEq, Eq, Hash)]
100pub struct UnknownPlainTermTypeError;
101
102impl Display for UnknownPlainTermTypeError {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        write!(f, "Unexpected type_id for encoded RDF Term")
105    }
106}
107
108impl TryFrom<u8> for PlainTermType {
109    type Error = UnknownPlainTermTypeError;
110
111    fn try_from(value: u8) -> Result<Self, Self::Error> {
112        match value {
113            0 => Ok(PlainTermType::NamedNode),
114            1 => Ok(PlainTermType::BlankNode),
115            2 => Ok(PlainTermType::Literal),
116            _ => Err(UnknownPlainTermTypeError),
117        }
118    }
119}
120
121impl From<PlainTermType> for u8 {
122    fn from(val: PlainTermType) -> Self {
123        match val {
124            PlainTermType::NamedNode => 0,
125            PlainTermType::BlankNode => 1,
126            PlainTermType::Literal => 2,
127        }
128    }
129}
130
131/// The instance of the [PlainTermEncoding].
132///
133/// As there is currently no way to parameterize the encoding, accessing it via this constant is
134/// the preferred way.
135pub const PLAIN_TERM_ENCODING: PlainTermEncoding = PlainTermEncoding;
136
137#[derive(Debug)]
138pub struct PlainTermEncoding;
139
140impl PlainTermEncoding {
141    /// Returns the Arrow [Fields] of the [PlainTermEncoding].
142    pub(crate) fn fields() -> Fields {
143        FIELDS_TYPE.clone()
144    }
145
146    /// Returns the type of the [PlainTermEncoding].
147    ///
148    /// The type of the [PlainTermEncoding] is statically known and cannot be configured.
149    pub fn data_type() -> DataType {
150        DataType::Struct(Self::fields().clone())
151    }
152
153    /// Encodes the `term` as a [PlainTermScalar].
154    pub fn encode_term(
155        &self,
156        term: ThinResult<TermRef<'_>>,
157    ) -> DFResult<PlainTermScalar> {
158        DefaultPlainTermEncoder::encode_term(term)
159    }
160}
161
162impl TermEncoding for PlainTermEncoding {
163    type Array = PlainTermArray;
164    type Scalar = PlainTermScalar;
165
166    fn name(&self) -> EncodingName {
167        EncodingName::PlainTerm
168    }
169
170    fn data_type(&self) -> DataType {
171        PlainTermEncoding::data_type()
172    }
173
174    fn try_new_array(&self, array: ArrayRef) -> DFResult<Self::Array> {
175        array.try_into()
176    }
177
178    fn try_new_scalar(&self, scalar: ScalarValue) -> DFResult<Self::Scalar> {
179        scalar.try_into()
180    }
181}
182
183#[cfg(test)]
184mod tests {
185    use super::*;
186
187    #[test]
188    fn test_plain_term_type_roundtrip() {
189        test_roundtrip(PlainTermType::NamedNode);
190        test_roundtrip(PlainTermType::BlankNode);
191        test_roundtrip(PlainTermType::Literal);
192    }
193
194    fn test_roundtrip(term_field: PlainTermType) {
195        let value: u8 = term_field.into();
196        assert_eq!(term_field, value.try_into().unwrap());
197    }
198}