1use crate::model::{
7 BlankNode, BlankNodeRef, Literal, LiteralRef, NamedNode, NamedNodeRef, Term, TermRef,
8};
9use serde::{Deserialize, Serialize};
10use std::collections::hash_map::DefaultHasher;
11use std::fmt::{Debug, Display};
12use std::hash::{Hash, Hasher};
13
14#[derive(Eq, PartialEq, Debug, Clone, Copy, Serialize, Deserialize)]
16pub struct StrHash {
17 hash: [u8; 16],
18}
19
20impl StrHash {
21 pub fn new(value: &str) -> Self {
23 let mut hasher = DefaultHasher::new();
24 hasher.write(value.as_bytes());
25 let hash_value = hasher.finish();
26
27 let mut full_hash = [0u8; 16];
29 full_hash[0..8].copy_from_slice(&hash_value.to_be_bytes());
30
31 let mut hasher2 = DefaultHasher::new();
33 hasher2.write(&[0xDE, 0xAD, 0xBE, 0xEF]); hasher2.write(value.as_bytes());
35 let hash_value2 = hasher2.finish();
36 full_hash[8..16].copy_from_slice(&hash_value2.to_be_bytes());
37
38 Self { hash: full_hash }
39 }
40
41 #[inline]
43 pub fn from_be_bytes(hash: [u8; 16]) -> Self {
44 Self { hash }
45 }
46
47 #[inline]
49 pub fn to_be_bytes(self) -> [u8; 16] {
50 self.hash
51 }
52}
53
54impl Hash for StrHash {
55 #[inline]
56 fn hash<H: Hasher>(&self, state: &mut H) {
57 let hash_val = u64::from_be_bytes([
59 self.hash[0],
60 self.hash[1],
61 self.hash[2],
62 self.hash[3],
63 self.hash[4],
64 self.hash[5],
65 self.hash[6],
66 self.hash[7],
67 ]);
68 state.write_u64(hash_val);
69 }
70}
71
72impl Display for StrHash {
73 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
74 write!(f, "StrHash({})", hex::encode(self.hash))
75 }
76}
77
78#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
81pub struct SmallString {
82 data: [u8; 16],
83 len: u8,
84}
85
86impl SmallString {
87 const MAX_INLINE_LEN: usize = 15;
88
89 pub fn new(s: &str) -> Option<Self> {
91 if s.len() > Self::MAX_INLINE_LEN {
92 return None;
93 }
94
95 let mut data = [0u8; 16];
96 data[..s.len()].copy_from_slice(s.as_bytes());
97
98 Some(SmallString {
99 data,
100 len: s.len() as u8,
101 })
102 }
103
104 pub fn as_str(&self) -> &str {
106 unsafe { std::str::from_utf8_unchecked(&self.data[..self.len as usize]) }
107 }
108
109 pub fn len(&self) -> usize {
111 self.len as usize
112 }
113
114 pub fn is_empty(&self) -> bool {
116 self.len == 0
117 }
118}
119
120impl Display for SmallString {
121 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122 write!(f, "{}", self.as_str())
123 }
124}
125
126impl From<&str> for SmallString {
127 fn from(s: &str) -> Self {
128 Self::new(s).expect("String too long for SmallString")
129 }
130}
131
132#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
134pub enum EncodedTerm {
135 DefaultGraph,
137
138 NamedNode { iri_id: StrHash },
140
141 NumericalBlankNode { id: [u8; 16] },
143
144 SmallBlankNode(SmallString),
146
147 BigBlankNode { id_id: StrHash },
149
150 SmallStringLiteral(SmallString),
152
153 BigStringLiteral { value_id: StrHash },
155
156 SmallSmallLangStringLiteral {
158 value: SmallString,
159 language: SmallString,
160 },
161
162 SmallBigLangStringLiteral {
164 value: SmallString,
165 language_id: StrHash,
166 },
167
168 BigSmallLangStringLiteral {
170 value_id: StrHash,
171 language: SmallString,
172 },
173
174 BigBigLangStringLiteral {
176 value_id: StrHash,
177 language_id: StrHash,
178 },
179
180 SmallSmallTypedLiteral {
182 value: SmallString,
183 datatype: SmallString,
184 },
185
186 SmallBigTypedLiteral {
188 value: SmallString,
189 datatype_id: StrHash,
190 },
191
192 BigSmallTypedLiteral {
194 value_id: StrHash,
195 datatype: SmallString,
196 },
197
198 BigBigTypedLiteral {
200 value_id: StrHash,
201 datatype_id: StrHash,
202 },
203
204 QuotedTriple {
206 subject: Box<EncodedTerm>,
207 predicate: Box<EncodedTerm>,
208 object: Box<EncodedTerm>,
209 },
210}
211
212impl EncodedTerm {
213 pub fn encode_named_node(node: &NamedNode) -> Self {
215 EncodedTerm::NamedNode {
216 iri_id: StrHash::new(node.as_str()),
217 }
218 }
219
220 pub fn encode_named_node_ref(node: NamedNodeRef<'_>) -> Self {
222 EncodedTerm::NamedNode {
223 iri_id: StrHash::new(node.as_str()),
224 }
225 }
226
227 pub fn encode_blank_node(node: &BlankNode) -> Self {
229 let id_str = node.as_str();
230
231 if let Ok(bytes) = hex::decode(id_str) {
233 if bytes.len() == 16 {
234 let mut id = [0u8; 16];
235 id.copy_from_slice(&bytes);
236 return EncodedTerm::NumericalBlankNode { id };
237 }
238 }
239
240 if let Some(small_string) = SmallString::new(id_str) {
242 EncodedTerm::SmallBlankNode(small_string)
243 } else {
244 EncodedTerm::BigBlankNode {
245 id_id: StrHash::new(id_str),
246 }
247 }
248 }
249
250 pub fn encode_blank_node_ref(node: BlankNodeRef<'_>) -> Self {
252 let id_str = node.as_str();
253
254 if let Ok(bytes) = hex::decode(id_str) {
256 if bytes.len() == 16 {
257 let mut id = [0u8; 16];
258 id.copy_from_slice(&bytes);
259 return EncodedTerm::NumericalBlankNode { id };
260 }
261 }
262
263 if let Some(small_string) = SmallString::new(id_str) {
265 EncodedTerm::SmallBlankNode(small_string)
266 } else {
267 EncodedTerm::BigBlankNode {
268 id_id: StrHash::new(id_str),
269 }
270 }
271 }
272
273 pub fn encode_literal(literal: &Literal) -> Self {
275 let value = literal.value();
276
277 if let Some(language) = literal.language() {
278 match (SmallString::new(value), SmallString::new(language)) {
280 (Some(small_value), Some(small_lang)) => EncodedTerm::SmallSmallLangStringLiteral {
281 value: small_value,
282 language: small_lang,
283 },
284 (Some(small_value), None) => EncodedTerm::SmallBigLangStringLiteral {
285 value: small_value,
286 language_id: StrHash::new(language),
287 },
288 (None, Some(small_lang)) => EncodedTerm::BigSmallLangStringLiteral {
289 value_id: StrHash::new(value),
290 language: small_lang,
291 },
292 (None, None) => EncodedTerm::BigBigLangStringLiteral {
293 value_id: StrHash::new(value),
294 language_id: StrHash::new(language),
295 },
296 }
297 } else {
298 let datatype = literal.datatype();
299 let datatype_str = datatype.as_str();
300
301 if datatype_str == "http://www.w3.org/2001/XMLSchema#string" {
303 if let Some(small_value) = SmallString::new(value) {
304 EncodedTerm::SmallStringLiteral(small_value)
305 } else {
306 EncodedTerm::BigStringLiteral {
307 value_id: StrHash::new(value),
308 }
309 }
310 } else {
311 match (SmallString::new(value), SmallString::new(datatype_str)) {
313 (Some(small_value), Some(small_datatype)) => {
314 EncodedTerm::SmallSmallTypedLiteral {
315 value: small_value,
316 datatype: small_datatype,
317 }
318 }
319 (Some(small_value), None) => EncodedTerm::SmallBigTypedLiteral {
320 value: small_value,
321 datatype_id: StrHash::new(datatype_str),
322 },
323 (None, Some(small_datatype)) => EncodedTerm::BigSmallTypedLiteral {
324 value_id: StrHash::new(value),
325 datatype: small_datatype,
326 },
327 (None, None) => EncodedTerm::BigBigTypedLiteral {
328 value_id: StrHash::new(value),
329 datatype_id: StrHash::new(datatype_str),
330 },
331 }
332 }
333 }
334 }
335
336 pub fn encode_literal_ref(literal: LiteralRef<'_>) -> Self {
338 let value = literal.value();
339
340 if let Some(language) = literal.language() {
341 match (SmallString::new(value), SmallString::new(language)) {
343 (Some(small_value), Some(small_lang)) => EncodedTerm::SmallSmallLangStringLiteral {
344 value: small_value,
345 language: small_lang,
346 },
347 (Some(small_value), None) => EncodedTerm::SmallBigLangStringLiteral {
348 value: small_value,
349 language_id: StrHash::new(language),
350 },
351 (None, Some(small_lang)) => EncodedTerm::BigSmallLangStringLiteral {
352 value_id: StrHash::new(value),
353 language: small_lang,
354 },
355 (None, None) => EncodedTerm::BigBigLangStringLiteral {
356 value_id: StrHash::new(value),
357 language_id: StrHash::new(language),
358 },
359 }
360 } else {
361 let datatype = literal.datatype();
362 let datatype_str = datatype.as_str();
363
364 if datatype_str == "http://www.w3.org/2001/XMLSchema#string" {
366 if let Some(small_value) = SmallString::new(value) {
367 EncodedTerm::SmallStringLiteral(small_value)
368 } else {
369 EncodedTerm::BigStringLiteral {
370 value_id: StrHash::new(value),
371 }
372 }
373 } else {
374 match (SmallString::new(value), SmallString::new(datatype_str)) {
376 (Some(small_value), Some(small_datatype)) => {
377 EncodedTerm::SmallSmallTypedLiteral {
378 value: small_value,
379 datatype: small_datatype,
380 }
381 }
382 (Some(small_value), None) => EncodedTerm::SmallBigTypedLiteral {
383 value: small_value,
384 datatype_id: StrHash::new(datatype_str),
385 },
386 (None, Some(small_datatype)) => EncodedTerm::BigSmallTypedLiteral {
387 value_id: StrHash::new(value),
388 datatype: small_datatype,
389 },
390 (None, None) => EncodedTerm::BigBigTypedLiteral {
391 value_id: StrHash::new(value),
392 datatype_id: StrHash::new(datatype_str),
393 },
394 }
395 }
396 }
397 }
398
399 pub fn encode_variable(_variable: &crate::model::Variable) -> Self {
401 panic!("Variables cannot be encoded for storage - they are only used in queries")
402 }
403
404 pub fn encode_quoted_triple(quoted_triple: &crate::model::star::QuotedTriple) -> Self {
406 let inner = quoted_triple.inner();
407 EncodedTerm::QuotedTriple {
408 subject: Box::new(Self::encode_term(&Term::from(inner.subject().clone()))),
409 predicate: Box::new(Self::encode_term(&Term::from(inner.predicate().clone()))),
410 object: Box::new(Self::encode_term(&Term::from(inner.object().clone()))),
411 }
412 }
413
414 pub fn encode_term(term: &Term) -> Self {
416 match term {
417 Term::NamedNode(n) => Self::encode_named_node(n),
418 Term::BlankNode(b) => Self::encode_blank_node(b),
419 Term::Literal(l) => Self::encode_literal(l),
420 Term::Variable(_) => panic!("Cannot encode variable in this context"),
421 Term::QuotedTriple(qt) => Self::encode_quoted_triple(qt),
422 }
423 }
424
425 pub fn encode_term_ref(term: TermRef<'_>) -> Self {
427 match term {
428 TermRef::NamedNode(n) => Self::encode_named_node_ref(n),
429 TermRef::BlankNode(b) => Self::encode_blank_node_ref(b),
430 TermRef::Literal(l) => Self::encode_literal_ref(l),
431 TermRef::Variable(v) => Self::encode_variable(v),
432 #[cfg(feature = "rdf-star")]
433 TermRef::Triple(qt) => Self::encode_quoted_triple(qt),
434 }
435 }
436
437 pub fn type_discriminant(&self) -> u8 {
439 match self {
440 EncodedTerm::DefaultGraph => 0,
441 EncodedTerm::NamedNode { .. } => 1,
442 EncodedTerm::NumericalBlankNode { .. } => 2,
443 EncodedTerm::SmallBlankNode(_) => 3,
444 EncodedTerm::BigBlankNode { .. } => 4,
445 EncodedTerm::SmallStringLiteral(_) => 5,
446 EncodedTerm::BigStringLiteral { .. } => 6,
447 EncodedTerm::SmallSmallLangStringLiteral { .. } => 7,
448 EncodedTerm::SmallBigLangStringLiteral { .. } => 8,
449 EncodedTerm::BigSmallLangStringLiteral { .. } => 9,
450 EncodedTerm::BigBigLangStringLiteral { .. } => 10,
451 EncodedTerm::SmallSmallTypedLiteral { .. } => 11,
452 EncodedTerm::SmallBigTypedLiteral { .. } => 12,
453 EncodedTerm::BigSmallTypedLiteral { .. } => 13,
454 EncodedTerm::BigBigTypedLiteral { .. } => 14,
455 EncodedTerm::QuotedTriple { .. } => 15,
456 }
457 }
458
459 pub fn is_named_node(&self) -> bool {
461 matches!(self, EncodedTerm::NamedNode { .. })
462 }
463
464 pub fn is_blank_node(&self) -> bool {
466 matches!(
467 self,
468 EncodedTerm::NumericalBlankNode { .. }
469 | EncodedTerm::SmallBlankNode(_)
470 | EncodedTerm::BigBlankNode { .. }
471 )
472 }
473
474 pub fn is_literal(&self) -> bool {
476 matches!(
477 self,
478 EncodedTerm::SmallStringLiteral(_)
479 | EncodedTerm::BigStringLiteral { .. }
480 | EncodedTerm::SmallSmallLangStringLiteral { .. }
481 | EncodedTerm::SmallBigLangStringLiteral { .. }
482 | EncodedTerm::BigSmallLangStringLiteral { .. }
483 | EncodedTerm::BigBigLangStringLiteral { .. }
484 | EncodedTerm::SmallSmallTypedLiteral { .. }
485 | EncodedTerm::SmallBigTypedLiteral { .. }
486 | EncodedTerm::BigSmallTypedLiteral { .. }
487 | EncodedTerm::BigBigTypedLiteral { .. }
488 )
489 }
490
491 pub fn is_quoted_triple(&self) -> bool {
493 matches!(self, EncodedTerm::QuotedTriple { .. })
494 }
495
496 pub fn size_hint(&self) -> usize {
498 match self {
499 EncodedTerm::DefaultGraph => 1,
500 EncodedTerm::NamedNode { .. } => 1 + 16,
501 EncodedTerm::NumericalBlankNode { .. } => 1 + 16,
502 EncodedTerm::SmallBlankNode(_) => 1 + 16 + 1,
503 EncodedTerm::BigBlankNode { .. } => 1 + 16,
504 EncodedTerm::SmallStringLiteral(_) => 1 + 16 + 1,
505 EncodedTerm::BigStringLiteral { .. } => 1 + 16,
506 EncodedTerm::SmallSmallLangStringLiteral { .. } => 1 + 16 + 1 + 16 + 1,
507 EncodedTerm::SmallBigLangStringLiteral { .. } => 1 + 16 + 1 + 16,
508 EncodedTerm::BigSmallLangStringLiteral { .. } => 1 + 16 + 16 + 1,
509 EncodedTerm::BigBigLangStringLiteral { .. } => 1 + 16 + 16,
510 EncodedTerm::SmallSmallTypedLiteral { .. } => 1 + 16 + 1 + 16 + 1,
511 EncodedTerm::SmallBigTypedLiteral { .. } => 1 + 16 + 1 + 16,
512 EncodedTerm::BigSmallTypedLiteral { .. } => 1 + 16 + 16 + 1,
513 EncodedTerm::BigBigTypedLiteral { .. } => 1 + 16 + 16,
514 EncodedTerm::QuotedTriple {
515 subject,
516 predicate,
517 object,
518 } => 1 + subject.size_hint() + predicate.size_hint() + object.size_hint(),
519 }
520 }
521}
522
523#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
525pub struct EncodedTriple {
526 pub subject: EncodedTerm,
527 pub predicate: EncodedTerm,
528 pub object: EncodedTerm,
529}
530
531impl EncodedTriple {
532 pub fn new(subject: EncodedTerm, predicate: EncodedTerm, object: EncodedTerm) -> Self {
534 Self {
535 subject,
536 predicate,
537 object,
538 }
539 }
540
541 pub fn size_hint(&self) -> usize {
543 self.subject.size_hint() + self.predicate.size_hint() + self.object.size_hint()
544 }
545}
546
547#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
549pub struct EncodedQuad {
550 pub subject: EncodedTerm,
551 pub predicate: EncodedTerm,
552 pub object: EncodedTerm,
553 pub graph_name: EncodedTerm,
554}
555
556impl EncodedQuad {
557 pub fn new(
559 subject: EncodedTerm,
560 predicate: EncodedTerm,
561 object: EncodedTerm,
562 graph_name: EncodedTerm,
563 ) -> Self {
564 Self {
565 subject,
566 predicate,
567 object,
568 graph_name,
569 }
570 }
571
572 pub fn size_hint(&self) -> usize {
574 self.subject.size_hint()
575 + self.predicate.size_hint()
576 + self.object.size_hint()
577 + self.graph_name.size_hint()
578 }
579}
580
581#[cfg(test)]
582mod tests {
583 use super::*;
584 use crate::model::*;
585 use crate::vocab::xsd;
586
587 #[test]
588 fn test_str_hash() {
589 let hash1 = StrHash::new("http://example.org/test");
590 let hash2 = StrHash::new("http://example.org/test");
591 let hash3 = StrHash::new("http://example.org/different");
592
593 assert_eq!(hash1, hash2);
594 assert_ne!(hash1, hash3);
595 }
596
597 #[test]
598 fn test_small_string() {
599 let small = SmallString::new("test").expect("construction should succeed");
600 assert_eq!(small.as_str(), "test");
601 assert_eq!(small.len(), 4);
602 assert!(!small.is_empty());
603
604 let empty = SmallString::new("").expect("construction should succeed");
605 assert!(empty.is_empty());
606
607 let long_str = "this is a very long string that exceeds the maximum inline length";
609 assert!(SmallString::new(long_str).is_none());
610 }
611
612 #[test]
613 fn test_encode_named_node() {
614 let node = NamedNode::new("http://example.org/test").expect("valid IRI");
615 let encoded = EncodedTerm::encode_named_node(&node);
616
617 assert!(encoded.is_named_node());
618 assert!(!encoded.is_blank_node());
619 assert!(!encoded.is_literal());
620 }
621
622 #[test]
623 fn test_encode_blank_node() {
624 let node = BlankNode::new("test").expect("valid blank node id");
625 let encoded = EncodedTerm::encode_blank_node(&node);
626
627 assert!(!encoded.is_named_node());
628 assert!(encoded.is_blank_node());
629 assert!(!encoded.is_literal());
630 }
631
632 #[test]
633 fn test_encode_literal() {
634 let literal = Literal::new("test");
636 let encoded = EncodedTerm::encode_literal(&literal);
637 assert!(encoded.is_literal());
638 assert!(matches!(encoded, EncodedTerm::SmallStringLiteral(_)));
639
640 let literal = Literal::new_lang("test", "en").expect("construction should succeed");
642 let encoded = EncodedTerm::encode_literal(&literal);
643 assert!(encoded.is_literal());
644 assert!(matches!(
645 encoded,
646 EncodedTerm::SmallSmallLangStringLiteral { .. }
647 ));
648
649 let literal = Literal::new_typed("42", xsd::INTEGER.clone());
651 let encoded = EncodedTerm::encode_literal(&literal);
652 assert!(encoded.is_literal());
653 assert!(matches!(encoded, EncodedTerm::SmallBigTypedLiteral { .. }));
654 }
655
656 #[test]
657 fn test_encoded_triple() {
658 let subject = EncodedTerm::encode_named_node(
659 &NamedNode::new("http://example.org/s").expect("valid IRI"),
660 );
661 let predicate = EncodedTerm::encode_named_node(
662 &NamedNode::new("http://example.org/p").expect("valid IRI"),
663 );
664 let object = EncodedTerm::encode_literal(&Literal::new("test"));
665
666 let triple = EncodedTriple::new(subject, predicate, object);
667 assert!(triple.size_hint() > 0);
668 }
669
670 #[test]
671 fn test_type_discriminant() {
672 let named_node = EncodedTerm::NamedNode {
673 iri_id: StrHash::new("http://example.org/test"),
674 };
675 let blank_node = EncodedTerm::SmallBlankNode(
676 SmallString::new("test").expect("construction should succeed"),
677 );
678 let literal = EncodedTerm::SmallStringLiteral(
679 SmallString::new("test").expect("construction should succeed"),
680 );
681
682 assert_eq!(named_node.type_discriminant(), 1);
683 assert_eq!(blank_node.type_discriminant(), 3);
684 assert_eq!(literal.type_discriminant(), 5);
685 }
686}