1use crate::store::encoding::{EncodedQuad, EncodedTerm, SmallString, StrHash};
7use crate::OxirsError;
8use std::io::{Cursor, Read};
9use std::mem::size_of;
10
11pub const WRITTEN_TERM_MAX_SIZE: usize = size_of::<u8>() + 2 * size_of::<StrHash>();
13
14const TYPE_DEFAULT_GRAPH: u8 = 0;
16const TYPE_NAMED_NODE_ID: u8 = 1;
17const TYPE_NUMERICAL_BLANK_NODE_ID: u8 = 8;
18const TYPE_SMALL_BLANK_NODE_ID: u8 = 9;
19const TYPE_BIG_BLANK_NODE_ID: u8 = 10;
20const TYPE_SMALL_STRING_LITERAL: u8 = 16;
21const TYPE_BIG_STRING_LITERAL: u8 = 17;
22const TYPE_SMALL_SMALL_LANG_STRING_LITERAL: u8 = 20;
23const TYPE_SMALL_BIG_LANG_STRING_LITERAL: u8 = 21;
24const TYPE_BIG_SMALL_LANG_STRING_LITERAL: u8 = 22;
25const TYPE_BIG_BIG_LANG_STRING_LITERAL: u8 = 23;
26const TYPE_SMALL_SMALL_TYPED_LITERAL: u8 = 24;
27const TYPE_SMALL_BIG_TYPED_LITERAL: u8 = 25;
28const TYPE_BIG_SMALL_TYPED_LITERAL: u8 = 26;
29const TYPE_BIG_BIG_TYPED_LITERAL: u8 = 27;
30const TYPE_QUOTED_TRIPLE: u8 = 30;
31
32#[derive(Clone, Copy, Debug)]
34pub enum QuadEncoding {
35 Spog,
37 Posg,
39 Ospg,
41 Gspo,
43 Gpos,
45 Gosp,
47}
48
49impl QuadEncoding {
50 pub fn decode(self, buffer: &[u8]) -> Result<EncodedQuad, OxirsError> {
52 let mut cursor = Cursor::new(buffer);
53 match self {
54 Self::Spog => decode_spog_quad(&mut cursor),
55 Self::Posg => decode_posg_quad(&mut cursor),
56 Self::Ospg => decode_ospg_quad(&mut cursor),
57 Self::Gspo => decode_gspo_quad(&mut cursor),
58 Self::Gpos => decode_gpos_quad(&mut cursor),
59 Self::Gosp => decode_gosp_quad(&mut cursor),
60 }
61 }
62
63 pub fn encode(self, quad: &EncodedQuad, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
65 match self {
66 Self::Spog => encode_spog_quad(quad, buffer),
67 Self::Posg => encode_posg_quad(quad, buffer),
68 Self::Ospg => encode_ospg_quad(quad, buffer),
69 Self::Gspo => encode_gspo_quad(quad, buffer),
70 Self::Gpos => encode_gpos_quad(quad, buffer),
71 Self::Gosp => encode_gosp_quad(quad, buffer),
72 }
73 }
74}
75
76pub fn encode_term(term: &EncodedTerm, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
78 match term {
79 EncodedTerm::DefaultGraph => {
80 buffer.push(TYPE_DEFAULT_GRAPH);
81 }
82 EncodedTerm::NamedNode { iri_id } => {
83 buffer.push(TYPE_NAMED_NODE_ID);
84 buffer.extend_from_slice(&iri_id.to_be_bytes());
85 }
86 EncodedTerm::NumericalBlankNode { id } => {
87 buffer.push(TYPE_NUMERICAL_BLANK_NODE_ID);
88 buffer.extend_from_slice(id);
89 }
90 EncodedTerm::SmallBlankNode(id) => {
91 buffer.push(TYPE_SMALL_BLANK_NODE_ID);
92 encode_small_string(id, buffer);
93 }
94 EncodedTerm::BigBlankNode { id_id } => {
95 buffer.push(TYPE_BIG_BLANK_NODE_ID);
96 buffer.extend_from_slice(&id_id.to_be_bytes());
97 }
98 EncodedTerm::SmallStringLiteral(value) => {
99 buffer.push(TYPE_SMALL_STRING_LITERAL);
100 encode_small_string(value, buffer);
101 }
102 EncodedTerm::BigStringLiteral { value_id } => {
103 buffer.push(TYPE_BIG_STRING_LITERAL);
104 buffer.extend_from_slice(&value_id.to_be_bytes());
105 }
106 EncodedTerm::SmallSmallLangStringLiteral { value, language } => {
107 buffer.push(TYPE_SMALL_SMALL_LANG_STRING_LITERAL);
108 encode_small_string(value, buffer);
109 encode_small_string(language, buffer);
110 }
111 EncodedTerm::SmallBigLangStringLiteral { value, language_id } => {
112 buffer.push(TYPE_SMALL_BIG_LANG_STRING_LITERAL);
113 encode_small_string(value, buffer);
114 buffer.extend_from_slice(&language_id.to_be_bytes());
115 }
116 EncodedTerm::BigSmallLangStringLiteral { value_id, language } => {
117 buffer.push(TYPE_BIG_SMALL_LANG_STRING_LITERAL);
118 buffer.extend_from_slice(&value_id.to_be_bytes());
119 encode_small_string(language, buffer);
120 }
121 EncodedTerm::BigBigLangStringLiteral {
122 value_id,
123 language_id,
124 } => {
125 buffer.push(TYPE_BIG_BIG_LANG_STRING_LITERAL);
126 buffer.extend_from_slice(&value_id.to_be_bytes());
127 buffer.extend_from_slice(&language_id.to_be_bytes());
128 }
129 EncodedTerm::SmallSmallTypedLiteral { value, datatype } => {
130 buffer.push(TYPE_SMALL_SMALL_TYPED_LITERAL);
131 encode_small_string(value, buffer);
132 encode_small_string(datatype, buffer);
133 }
134 EncodedTerm::SmallBigTypedLiteral { value, datatype_id } => {
135 buffer.push(TYPE_SMALL_BIG_TYPED_LITERAL);
136 encode_small_string(value, buffer);
137 buffer.extend_from_slice(&datatype_id.to_be_bytes());
138 }
139 EncodedTerm::BigSmallTypedLiteral { value_id, datatype } => {
140 buffer.push(TYPE_BIG_SMALL_TYPED_LITERAL);
141 buffer.extend_from_slice(&value_id.to_be_bytes());
142 encode_small_string(datatype, buffer);
143 }
144 EncodedTerm::BigBigTypedLiteral {
145 value_id,
146 datatype_id,
147 } => {
148 buffer.push(TYPE_BIG_BIG_TYPED_LITERAL);
149 buffer.extend_from_slice(&value_id.to_be_bytes());
150 buffer.extend_from_slice(&datatype_id.to_be_bytes());
151 }
152 EncodedTerm::QuotedTriple {
153 subject,
154 predicate,
155 object,
156 } => {
157 buffer.push(TYPE_QUOTED_TRIPLE);
158 encode_term(subject, buffer)?;
159 encode_term(predicate, buffer)?;
160 encode_term(object, buffer)?;
161 }
162 }
163 Ok(())
164}
165
166pub fn decode_term(buffer: &mut Cursor<&[u8]>) -> Result<EncodedTerm, OxirsError> {
168 let mut type_byte = [0u8; 1];
169 buffer
170 .read_exact(&mut type_byte)
171 .map_err(|e| OxirsError::Store(format!("Failed to read type byte: {e}")))?;
172
173 match type_byte[0] {
174 TYPE_DEFAULT_GRAPH => Ok(EncodedTerm::DefaultGraph),
175 TYPE_NAMED_NODE_ID => {
176 let iri_id = read_str_hash(buffer)?;
177 Ok(EncodedTerm::NamedNode { iri_id })
178 }
179 TYPE_NUMERICAL_BLANK_NODE_ID => {
180 let mut id = [0u8; 16];
181 buffer
182 .read_exact(&mut id)
183 .map_err(|e| OxirsError::Store(format!("Failed to read blank node ID: {e}")))?;
184 Ok(EncodedTerm::NumericalBlankNode { id })
185 }
186 TYPE_SMALL_BLANK_NODE_ID => {
187 let id = decode_small_string(buffer)?;
188 Ok(EncodedTerm::SmallBlankNode(id))
189 }
190 TYPE_BIG_BLANK_NODE_ID => {
191 let id_id = read_str_hash(buffer)?;
192 Ok(EncodedTerm::BigBlankNode { id_id })
193 }
194 TYPE_SMALL_STRING_LITERAL => {
195 let value = decode_small_string(buffer)?;
196 Ok(EncodedTerm::SmallStringLiteral(value))
197 }
198 TYPE_BIG_STRING_LITERAL => {
199 let value_id = read_str_hash(buffer)?;
200 Ok(EncodedTerm::BigStringLiteral { value_id })
201 }
202 TYPE_SMALL_SMALL_LANG_STRING_LITERAL => {
203 let value = decode_small_string(buffer)?;
204 let language = decode_small_string(buffer)?;
205 Ok(EncodedTerm::SmallSmallLangStringLiteral { value, language })
206 }
207 TYPE_SMALL_BIG_LANG_STRING_LITERAL => {
208 let value = decode_small_string(buffer)?;
209 let language_id = read_str_hash(buffer)?;
210 Ok(EncodedTerm::SmallBigLangStringLiteral { value, language_id })
211 }
212 TYPE_BIG_SMALL_LANG_STRING_LITERAL => {
213 let value_id = read_str_hash(buffer)?;
214 let language = decode_small_string(buffer)?;
215 Ok(EncodedTerm::BigSmallLangStringLiteral { value_id, language })
216 }
217 TYPE_BIG_BIG_LANG_STRING_LITERAL => {
218 let value_id = read_str_hash(buffer)?;
219 let language_id = read_str_hash(buffer)?;
220 Ok(EncodedTerm::BigBigLangStringLiteral {
221 value_id,
222 language_id,
223 })
224 }
225 TYPE_SMALL_SMALL_TYPED_LITERAL => {
226 let value = decode_small_string(buffer)?;
227 let datatype = decode_small_string(buffer)?;
228 Ok(EncodedTerm::SmallSmallTypedLiteral { value, datatype })
229 }
230 TYPE_SMALL_BIG_TYPED_LITERAL => {
231 let value = decode_small_string(buffer)?;
232 let datatype_id = read_str_hash(buffer)?;
233 Ok(EncodedTerm::SmallBigTypedLiteral { value, datatype_id })
234 }
235 TYPE_BIG_SMALL_TYPED_LITERAL => {
236 let value_id = read_str_hash(buffer)?;
237 let datatype = decode_small_string(buffer)?;
238 Ok(EncodedTerm::BigSmallTypedLiteral { value_id, datatype })
239 }
240 TYPE_BIG_BIG_TYPED_LITERAL => {
241 let value_id = read_str_hash(buffer)?;
242 let datatype_id = read_str_hash(buffer)?;
243 Ok(EncodedTerm::BigBigTypedLiteral {
244 value_id,
245 datatype_id,
246 })
247 }
248 TYPE_QUOTED_TRIPLE => {
249 let subject = Box::new(decode_term(buffer)?);
250 let predicate = Box::new(decode_term(buffer)?);
251 let object = Box::new(decode_term(buffer)?);
252 Ok(EncodedTerm::QuotedTriple {
253 subject,
254 predicate,
255 object,
256 })
257 }
258 type_byte => Err(OxirsError::Store(format!(
259 "Unknown encoded term type: {type_byte}"
260 ))),
261 }
262}
263
264fn encode_small_string(small_string: &SmallString, buffer: &mut Vec<u8>) {
266 buffer.push(small_string.len() as u8);
267 buffer.extend_from_slice(small_string.as_str().as_bytes());
268}
269
270fn decode_small_string(buffer: &mut Cursor<&[u8]>) -> Result<SmallString, OxirsError> {
272 let mut len_byte = [0u8; 1];
273 buffer
274 .read_exact(&mut len_byte)
275 .map_err(|e| OxirsError::Store(format!("Failed to read string length: {e}")))?;
276
277 let len = len_byte[0] as usize;
278 if len > 15 {
279 return Err(OxirsError::Store(format!(
280 "SmallString length {len} exceeds maximum of 15"
281 )));
282 }
283
284 let mut data = [0u8; 16];
285 if len > 0 {
286 buffer
287 .read_exact(&mut data[..len])
288 .map_err(|e| OxirsError::Store(format!("Failed to read string data: {e}")))?;
289 }
290
291 let s = std::str::from_utf8(&data[..len])
292 .map_err(|e| OxirsError::Store(format!("Invalid UTF-8 in small string: {e}")))?;
293
294 SmallString::new(s)
295 .ok_or_else(|| OxirsError::Store("String too long for SmallString".to_string()))
296}
297
298fn read_str_hash(buffer: &mut Cursor<&[u8]>) -> Result<StrHash, OxirsError> {
300 let mut hash_bytes = [0u8; 16];
301 buffer
302 .read_exact(&mut hash_bytes)
303 .map_err(|e| OxirsError::Store(format!("Failed to read StrHash: {e}")))?;
304 Ok(StrHash::from_be_bytes(hash_bytes))
305}
306
307fn encode_spog_quad(quad: &EncodedQuad, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
310 encode_term(&quad.subject, buffer)?;
311 encode_term(&quad.predicate, buffer)?;
312 encode_term(&quad.object, buffer)?;
313 encode_term(&quad.graph_name, buffer)
314}
315
316fn decode_spog_quad(cursor: &mut Cursor<&[u8]>) -> Result<EncodedQuad, OxirsError> {
317 let subject = decode_term(cursor)?;
318 let predicate = decode_term(cursor)?;
319 let object = decode_term(cursor)?;
320 let graph_name = decode_term(cursor)?;
321 Ok(EncodedQuad::new(subject, predicate, object, graph_name))
322}
323
324fn encode_posg_quad(quad: &EncodedQuad, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
325 encode_term(&quad.predicate, buffer)?;
326 encode_term(&quad.object, buffer)?;
327 encode_term(&quad.subject, buffer)?;
328 encode_term(&quad.graph_name, buffer)
329}
330
331fn decode_posg_quad(cursor: &mut Cursor<&[u8]>) -> Result<EncodedQuad, OxirsError> {
332 let predicate = decode_term(cursor)?;
333 let object = decode_term(cursor)?;
334 let subject = decode_term(cursor)?;
335 let graph_name = decode_term(cursor)?;
336 Ok(EncodedQuad::new(subject, predicate, object, graph_name))
337}
338
339fn encode_ospg_quad(quad: &EncodedQuad, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
340 encode_term(&quad.object, buffer)?;
341 encode_term(&quad.subject, buffer)?;
342 encode_term(&quad.predicate, buffer)?;
343 encode_term(&quad.graph_name, buffer)
344}
345
346fn decode_ospg_quad(cursor: &mut Cursor<&[u8]>) -> Result<EncodedQuad, OxirsError> {
347 let object = decode_term(cursor)?;
348 let subject = decode_term(cursor)?;
349 let predicate = decode_term(cursor)?;
350 let graph_name = decode_term(cursor)?;
351 Ok(EncodedQuad::new(subject, predicate, object, graph_name))
352}
353
354fn encode_gspo_quad(quad: &EncodedQuad, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
355 encode_term(&quad.graph_name, buffer)?;
356 encode_term(&quad.subject, buffer)?;
357 encode_term(&quad.predicate, buffer)?;
358 encode_term(&quad.object, buffer)
359}
360
361fn decode_gspo_quad(cursor: &mut Cursor<&[u8]>) -> Result<EncodedQuad, OxirsError> {
362 let graph_name = decode_term(cursor)?;
363 let subject = decode_term(cursor)?;
364 let predicate = decode_term(cursor)?;
365 let object = decode_term(cursor)?;
366 Ok(EncodedQuad::new(subject, predicate, object, graph_name))
367}
368
369fn encode_gpos_quad(quad: &EncodedQuad, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
370 encode_term(&quad.graph_name, buffer)?;
371 encode_term(&quad.predicate, buffer)?;
372 encode_term(&quad.object, buffer)?;
373 encode_term(&quad.subject, buffer)
374}
375
376fn decode_gpos_quad(cursor: &mut Cursor<&[u8]>) -> Result<EncodedQuad, OxirsError> {
377 let graph_name = decode_term(cursor)?;
378 let predicate = decode_term(cursor)?;
379 let object = decode_term(cursor)?;
380 let subject = decode_term(cursor)?;
381 Ok(EncodedQuad::new(subject, predicate, object, graph_name))
382}
383
384fn encode_gosp_quad(quad: &EncodedQuad, buffer: &mut Vec<u8>) -> Result<(), OxirsError> {
385 encode_term(&quad.graph_name, buffer)?;
386 encode_term(&quad.object, buffer)?;
387 encode_term(&quad.subject, buffer)?;
388 encode_term(&quad.predicate, buffer)
389}
390
391fn decode_gosp_quad(cursor: &mut Cursor<&[u8]>) -> Result<EncodedQuad, OxirsError> {
392 let graph_name = decode_term(cursor)?;
393 let object = decode_term(cursor)?;
394 let subject = decode_term(cursor)?;
395 let predicate = decode_term(cursor)?;
396 Ok(EncodedQuad::new(subject, predicate, object, graph_name))
397}
398
399#[cfg(test)]
400mod tests {
401 use super::*;
402
403 #[test]
404 fn test_term_encoding_roundtrip() {
405 let terms = vec![
406 EncodedTerm::DefaultGraph,
407 EncodedTerm::NamedNode {
408 iri_id: StrHash::new("http://example.org/test"),
409 },
410 EncodedTerm::SmallBlankNode(
411 SmallString::new("test").expect("construction should succeed"),
412 ),
413 EncodedTerm::SmallStringLiteral(
414 SmallString::new("hello").expect("construction should succeed"),
415 ),
416 EncodedTerm::SmallSmallLangStringLiteral {
417 value: SmallString::new("hello").expect("construction should succeed"),
418 language: SmallString::new("en").expect("construction should succeed"),
419 },
420 ];
421
422 for term in terms {
423 let mut buffer = Vec::new();
424 encode_term(&term, &mut buffer).expect("term encoding should succeed");
425
426 let mut cursor = Cursor::new(buffer.as_slice());
427 let decoded = decode_term(&mut cursor).expect("term decoding should succeed");
428
429 assert_eq!(term, decoded);
430 }
431 }
432
433 #[test]
434 fn test_quad_encoding_roundtrip() {
435 let quad = EncodedQuad::new(
436 EncodedTerm::NamedNode {
437 iri_id: StrHash::new("http://example.org/s"),
438 },
439 EncodedTerm::NamedNode {
440 iri_id: StrHash::new("http://example.org/p"),
441 },
442 EncodedTerm::SmallStringLiteral(
443 SmallString::new("object").expect("construction should succeed"),
444 ),
445 EncodedTerm::DefaultGraph,
446 );
447
448 let encodings = [
449 QuadEncoding::Spog,
450 QuadEncoding::Posg,
451 QuadEncoding::Ospg,
452 QuadEncoding::Gspo,
453 QuadEncoding::Gpos,
454 QuadEncoding::Gosp,
455 ];
456
457 for encoding in &encodings {
458 let mut buffer = Vec::new();
459 encoding
460 .encode(&quad, &mut buffer)
461 .expect("encoding should succeed");
462
463 let decoded = encoding.decode(&buffer).expect("decoding should succeed");
464 assert_eq!(quad, decoded);
465 }
466 }
467
468 #[test]
469 fn test_small_string_encoding() {
470 let strings = ["", "test", "hello world", "emoji🚀"];
471
472 for s in &strings {
473 if let Some(small_string) = SmallString::new(s) {
474 let mut buffer = Vec::new();
475 encode_small_string(&small_string, &mut buffer);
476
477 let mut cursor = Cursor::new(buffer.as_slice());
478 let decoded =
479 decode_small_string(&mut cursor).expect("string decoding should succeed");
480
481 assert_eq!(small_string.as_str(), decoded.as_str());
482 }
483 }
484 }
485
486 #[test]
487 fn test_str_hash_encoding() {
488 let hash = StrHash::new("http://example.org/test");
489 let bytes = hash.to_be_bytes();
490 let reconstructed = StrHash::from_be_bytes(bytes);
491 assert_eq!(hash, reconstructed);
492 }
493}