use crate::model::{
BlankNode, BlankNodeRef, Literal, LiteralRef, NamedNode, NamedNodeRef, Term, TermRef,
};
use serde::{Deserialize, Serialize};
use std::collections::hash_map::DefaultHasher;
use std::fmt::{Debug, Display};
use std::hash::{Hash, Hasher};
#[derive(Eq, PartialEq, Debug, Clone, Copy, Serialize, Deserialize)]
pub struct StrHash {
hash: [u8; 16],
}
impl StrHash {
pub fn new(value: &str) -> Self {
let mut hasher = DefaultHasher::new();
hasher.write(value.as_bytes());
let hash_value = hasher.finish();
let mut full_hash = [0u8; 16];
full_hash[0..8].copy_from_slice(&hash_value.to_be_bytes());
let mut hasher2 = DefaultHasher::new();
hasher2.write(&[0xDE, 0xAD, 0xBE, 0xEF]); hasher2.write(value.as_bytes());
let hash_value2 = hasher2.finish();
full_hash[8..16].copy_from_slice(&hash_value2.to_be_bytes());
Self { hash: full_hash }
}
#[inline]
pub fn from_be_bytes(hash: [u8; 16]) -> Self {
Self { hash }
}
#[inline]
pub fn to_be_bytes(self) -> [u8; 16] {
self.hash
}
}
impl Hash for StrHash {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
let hash_val = u64::from_be_bytes([
self.hash[0],
self.hash[1],
self.hash[2],
self.hash[3],
self.hash[4],
self.hash[5],
self.hash[6],
self.hash[7],
]);
state.write_u64(hash_val);
}
}
impl Display for StrHash {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "StrHash({})", hex::encode(self.hash))
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct SmallString {
data: [u8; 16],
len: u8,
}
impl SmallString {
const MAX_INLINE_LEN: usize = 15;
pub fn new(s: &str) -> Option<Self> {
if s.len() > Self::MAX_INLINE_LEN {
return None;
}
let mut data = [0u8; 16];
data[..s.len()].copy_from_slice(s.as_bytes());
Some(SmallString {
data,
len: s.len() as u8,
})
}
pub fn as_str(&self) -> &str {
unsafe { std::str::from_utf8_unchecked(&self.data[..self.len as usize]) }
}
pub fn len(&self) -> usize {
self.len as usize
}
pub fn is_empty(&self) -> bool {
self.len == 0
}
}
impl Display for SmallString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.as_str())
}
}
impl From<&str> for SmallString {
fn from(s: &str) -> Self {
Self::new(s).expect("String too long for SmallString")
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum EncodedTerm {
DefaultGraph,
NamedNode { iri_id: StrHash },
NumericalBlankNode { id: [u8; 16] },
SmallBlankNode(SmallString),
BigBlankNode { id_id: StrHash },
SmallStringLiteral(SmallString),
BigStringLiteral { value_id: StrHash },
SmallSmallLangStringLiteral {
value: SmallString,
language: SmallString,
},
SmallBigLangStringLiteral {
value: SmallString,
language_id: StrHash,
},
BigSmallLangStringLiteral {
value_id: StrHash,
language: SmallString,
},
BigBigLangStringLiteral {
value_id: StrHash,
language_id: StrHash,
},
SmallSmallTypedLiteral {
value: SmallString,
datatype: SmallString,
},
SmallBigTypedLiteral {
value: SmallString,
datatype_id: StrHash,
},
BigSmallTypedLiteral {
value_id: StrHash,
datatype: SmallString,
},
BigBigTypedLiteral {
value_id: StrHash,
datatype_id: StrHash,
},
QuotedTriple {
subject: Box<EncodedTerm>,
predicate: Box<EncodedTerm>,
object: Box<EncodedTerm>,
},
}
impl EncodedTerm {
pub fn encode_named_node(node: &NamedNode) -> Self {
EncodedTerm::NamedNode {
iri_id: StrHash::new(node.as_str()),
}
}
pub fn encode_named_node_ref(node: NamedNodeRef<'_>) -> Self {
EncodedTerm::NamedNode {
iri_id: StrHash::new(node.as_str()),
}
}
pub fn encode_blank_node(node: &BlankNode) -> Self {
let id_str = node.as_str();
if let Ok(bytes) = hex::decode(id_str) {
if bytes.len() == 16 {
let mut id = [0u8; 16];
id.copy_from_slice(&bytes);
return EncodedTerm::NumericalBlankNode { id };
}
}
if let Some(small_string) = SmallString::new(id_str) {
EncodedTerm::SmallBlankNode(small_string)
} else {
EncodedTerm::BigBlankNode {
id_id: StrHash::new(id_str),
}
}
}
pub fn encode_blank_node_ref(node: BlankNodeRef<'_>) -> Self {
let id_str = node.as_str();
if let Ok(bytes) = hex::decode(id_str) {
if bytes.len() == 16 {
let mut id = [0u8; 16];
id.copy_from_slice(&bytes);
return EncodedTerm::NumericalBlankNode { id };
}
}
if let Some(small_string) = SmallString::new(id_str) {
EncodedTerm::SmallBlankNode(small_string)
} else {
EncodedTerm::BigBlankNode {
id_id: StrHash::new(id_str),
}
}
}
pub fn encode_literal(literal: &Literal) -> Self {
let value = literal.value();
if let Some(language) = literal.language() {
match (SmallString::new(value), SmallString::new(language)) {
(Some(small_value), Some(small_lang)) => EncodedTerm::SmallSmallLangStringLiteral {
value: small_value,
language: small_lang,
},
(Some(small_value), None) => EncodedTerm::SmallBigLangStringLiteral {
value: small_value,
language_id: StrHash::new(language),
},
(None, Some(small_lang)) => EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::new(value),
language: small_lang,
},
(None, None) => EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::new(value),
language_id: StrHash::new(language),
},
}
} else {
let datatype = literal.datatype();
let datatype_str = datatype.as_str();
if datatype_str == "http://www.w3.org/2001/XMLSchema#string" {
if let Some(small_value) = SmallString::new(value) {
EncodedTerm::SmallStringLiteral(small_value)
} else {
EncodedTerm::BigStringLiteral {
value_id: StrHash::new(value),
}
}
} else {
match (SmallString::new(value), SmallString::new(datatype_str)) {
(Some(small_value), Some(small_datatype)) => {
EncodedTerm::SmallSmallTypedLiteral {
value: small_value,
datatype: small_datatype,
}
}
(Some(small_value), None) => EncodedTerm::SmallBigTypedLiteral {
value: small_value,
datatype_id: StrHash::new(datatype_str),
},
(None, Some(small_datatype)) => EncodedTerm::BigSmallTypedLiteral {
value_id: StrHash::new(value),
datatype: small_datatype,
},
(None, None) => EncodedTerm::BigBigTypedLiteral {
value_id: StrHash::new(value),
datatype_id: StrHash::new(datatype_str),
},
}
}
}
}
pub fn encode_literal_ref(literal: LiteralRef<'_>) -> Self {
let value = literal.value();
if let Some(language) = literal.language() {
match (SmallString::new(value), SmallString::new(language)) {
(Some(small_value), Some(small_lang)) => EncodedTerm::SmallSmallLangStringLiteral {
value: small_value,
language: small_lang,
},
(Some(small_value), None) => EncodedTerm::SmallBigLangStringLiteral {
value: small_value,
language_id: StrHash::new(language),
},
(None, Some(small_lang)) => EncodedTerm::BigSmallLangStringLiteral {
value_id: StrHash::new(value),
language: small_lang,
},
(None, None) => EncodedTerm::BigBigLangStringLiteral {
value_id: StrHash::new(value),
language_id: StrHash::new(language),
},
}
} else {
let datatype = literal.datatype();
let datatype_str = datatype.as_str();
if datatype_str == "http://www.w3.org/2001/XMLSchema#string" {
if let Some(small_value) = SmallString::new(value) {
EncodedTerm::SmallStringLiteral(small_value)
} else {
EncodedTerm::BigStringLiteral {
value_id: StrHash::new(value),
}
}
} else {
match (SmallString::new(value), SmallString::new(datatype_str)) {
(Some(small_value), Some(small_datatype)) => {
EncodedTerm::SmallSmallTypedLiteral {
value: small_value,
datatype: small_datatype,
}
}
(Some(small_value), None) => EncodedTerm::SmallBigTypedLiteral {
value: small_value,
datatype_id: StrHash::new(datatype_str),
},
(None, Some(small_datatype)) => EncodedTerm::BigSmallTypedLiteral {
value_id: StrHash::new(value),
datatype: small_datatype,
},
(None, None) => EncodedTerm::BigBigTypedLiteral {
value_id: StrHash::new(value),
datatype_id: StrHash::new(datatype_str),
},
}
}
}
}
pub fn encode_variable(_variable: &crate::model::Variable) -> Self {
panic!("Variables cannot be encoded for storage - they are only used in queries")
}
pub fn encode_quoted_triple(quoted_triple: &crate::model::star::QuotedTriple) -> Self {
let inner = quoted_triple.inner();
EncodedTerm::QuotedTriple {
subject: Box::new(Self::encode_term(&Term::from(inner.subject().clone()))),
predicate: Box::new(Self::encode_term(&Term::from(inner.predicate().clone()))),
object: Box::new(Self::encode_term(&Term::from(inner.object().clone()))),
}
}
pub fn encode_term(term: &Term) -> Self {
match term {
Term::NamedNode(n) => Self::encode_named_node(n),
Term::BlankNode(b) => Self::encode_blank_node(b),
Term::Literal(l) => Self::encode_literal(l),
Term::Variable(_) => panic!("Cannot encode variable in this context"),
Term::QuotedTriple(qt) => Self::encode_quoted_triple(qt),
}
}
pub fn encode_term_ref(term: TermRef<'_>) -> Self {
match term {
TermRef::NamedNode(n) => Self::encode_named_node_ref(n),
TermRef::BlankNode(b) => Self::encode_blank_node_ref(b),
TermRef::Literal(l) => Self::encode_literal_ref(l),
TermRef::Variable(v) => Self::encode_variable(v),
#[cfg(feature = "rdf-star")]
TermRef::Triple(qt) => Self::encode_quoted_triple(qt),
}
}
pub fn type_discriminant(&self) -> u8 {
match self {
EncodedTerm::DefaultGraph => 0,
EncodedTerm::NamedNode { .. } => 1,
EncodedTerm::NumericalBlankNode { .. } => 2,
EncodedTerm::SmallBlankNode(_) => 3,
EncodedTerm::BigBlankNode { .. } => 4,
EncodedTerm::SmallStringLiteral(_) => 5,
EncodedTerm::BigStringLiteral { .. } => 6,
EncodedTerm::SmallSmallLangStringLiteral { .. } => 7,
EncodedTerm::SmallBigLangStringLiteral { .. } => 8,
EncodedTerm::BigSmallLangStringLiteral { .. } => 9,
EncodedTerm::BigBigLangStringLiteral { .. } => 10,
EncodedTerm::SmallSmallTypedLiteral { .. } => 11,
EncodedTerm::SmallBigTypedLiteral { .. } => 12,
EncodedTerm::BigSmallTypedLiteral { .. } => 13,
EncodedTerm::BigBigTypedLiteral { .. } => 14,
EncodedTerm::QuotedTriple { .. } => 15,
}
}
pub fn is_named_node(&self) -> bool {
matches!(self, EncodedTerm::NamedNode { .. })
}
pub fn is_blank_node(&self) -> bool {
matches!(
self,
EncodedTerm::NumericalBlankNode { .. }
| EncodedTerm::SmallBlankNode(_)
| EncodedTerm::BigBlankNode { .. }
)
}
pub fn is_literal(&self) -> bool {
matches!(
self,
EncodedTerm::SmallStringLiteral(_)
| EncodedTerm::BigStringLiteral { .. }
| EncodedTerm::SmallSmallLangStringLiteral { .. }
| EncodedTerm::SmallBigLangStringLiteral { .. }
| EncodedTerm::BigSmallLangStringLiteral { .. }
| EncodedTerm::BigBigLangStringLiteral { .. }
| EncodedTerm::SmallSmallTypedLiteral { .. }
| EncodedTerm::SmallBigTypedLiteral { .. }
| EncodedTerm::BigSmallTypedLiteral { .. }
| EncodedTerm::BigBigTypedLiteral { .. }
)
}
pub fn is_quoted_triple(&self) -> bool {
matches!(self, EncodedTerm::QuotedTriple { .. })
}
pub fn size_hint(&self) -> usize {
match self {
EncodedTerm::DefaultGraph => 1,
EncodedTerm::NamedNode { .. } => 1 + 16,
EncodedTerm::NumericalBlankNode { .. } => 1 + 16,
EncodedTerm::SmallBlankNode(_) => 1 + 16 + 1,
EncodedTerm::BigBlankNode { .. } => 1 + 16,
EncodedTerm::SmallStringLiteral(_) => 1 + 16 + 1,
EncodedTerm::BigStringLiteral { .. } => 1 + 16,
EncodedTerm::SmallSmallLangStringLiteral { .. } => 1 + 16 + 1 + 16 + 1,
EncodedTerm::SmallBigLangStringLiteral { .. } => 1 + 16 + 1 + 16,
EncodedTerm::BigSmallLangStringLiteral { .. } => 1 + 16 + 16 + 1,
EncodedTerm::BigBigLangStringLiteral { .. } => 1 + 16 + 16,
EncodedTerm::SmallSmallTypedLiteral { .. } => 1 + 16 + 1 + 16 + 1,
EncodedTerm::SmallBigTypedLiteral { .. } => 1 + 16 + 1 + 16,
EncodedTerm::BigSmallTypedLiteral { .. } => 1 + 16 + 16 + 1,
EncodedTerm::BigBigTypedLiteral { .. } => 1 + 16 + 16,
EncodedTerm::QuotedTriple {
subject,
predicate,
object,
} => 1 + subject.size_hint() + predicate.size_hint() + object.size_hint(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct EncodedTriple {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
}
impl EncodedTriple {
pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
Self {
subject,
predicate,
object,
}
}
pub fn size_hint(&self) -> usize {
self.subject.size_hint() + self.predicate.size_hint() + self.object.size_hint()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct EncodedQuad {
pub subject: EncodedTerm,
pub predicate: EncodedTerm,
pub object: EncodedTerm,
pub graph_name: EncodedTerm,
}
impl EncodedQuad {
pub fn new(
subject: EncodedTerm,
predicate: EncodedTerm,
object: EncodedTerm,
graph_name: EncodedTerm,
) -> Self {
Self {
subject,
predicate,
object,
graph_name,
}
}
pub fn size_hint(&self) -> usize {
self.subject.size_hint()
+ self.predicate.size_hint()
+ self.object.size_hint()
+ self.graph_name.size_hint()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::*;
use crate::vocab::xsd;
#[test]
fn test_str_hash() {
let hash1 = StrHash::new("http://example.org/test");
let hash2 = StrHash::new("http://example.org/test");
let hash3 = StrHash::new("http://example.org/different");
assert_eq!(hash1, hash2);
assert_ne!(hash1, hash3);
}
#[test]
fn test_small_string() {
let small = SmallString::new("test").expect("construction should succeed");
assert_eq!(small.as_str(), "test");
assert_eq!(small.len(), 4);
assert!(!small.is_empty());
let empty = SmallString::new("").expect("construction should succeed");
assert!(empty.is_empty());
let long_str = "this is a very long string that exceeds the maximum inline length";
assert!(SmallString::new(long_str).is_none());
}
#[test]
fn test_encode_named_node() {
let node = NamedNode::new("http://example.org/test").expect("valid IRI");
let encoded = EncodedTerm::encode_named_node(&node);
assert!(encoded.is_named_node());
assert!(!encoded.is_blank_node());
assert!(!encoded.is_literal());
}
#[test]
fn test_encode_blank_node() {
let node = BlankNode::new("test").expect("valid blank node id");
let encoded = EncodedTerm::encode_blank_node(&node);
assert!(!encoded.is_named_node());
assert!(encoded.is_blank_node());
assert!(!encoded.is_literal());
}
#[test]
fn test_encode_literal() {
let literal = Literal::new("test");
let encoded = EncodedTerm::encode_literal(&literal);
assert!(encoded.is_literal());
assert!(matches!(encoded, EncodedTerm::SmallStringLiteral(_)));
let literal = Literal::new_lang("test", "en").expect("construction should succeed");
let encoded = EncodedTerm::encode_literal(&literal);
assert!(encoded.is_literal());
assert!(matches!(
encoded,
EncodedTerm::SmallSmallLangStringLiteral { .. }
));
let literal = Literal::new_typed("42", xsd::INTEGER.clone());
let encoded = EncodedTerm::encode_literal(&literal);
assert!(encoded.is_literal());
assert!(matches!(encoded, EncodedTerm::SmallBigTypedLiteral { .. }));
}
#[test]
fn test_encoded_triple() {
let subject = EncodedTerm::encode_named_node(
&NamedNode::new("http://example.org/s").expect("valid IRI"),
);
let predicate = EncodedTerm::encode_named_node(
&NamedNode::new("http://example.org/p").expect("valid IRI"),
);
let object = EncodedTerm::encode_literal(&Literal::new("test"));
let triple = EncodedTriple::new(subject, predicate, object);
assert!(triple.size_hint() > 0);
}
#[test]
fn test_type_discriminant() {
let named_node = EncodedTerm::NamedNode {
iri_id: StrHash::new("http://example.org/test"),
};
let blank_node = EncodedTerm::SmallBlankNode(
SmallString::new("test").expect("construction should succeed"),
);
let literal = EncodedTerm::SmallStringLiteral(
SmallString::new("test").expect("construction should succeed"),
);
assert_eq!(named_node.type_discriminant(), 1);
assert_eq!(blank_node.type_discriminant(), 3);
assert_eq!(literal.type_discriminant(), 5);
}
}