Skip to main content

oxirs_core/io/
zero_copy.rs

1//! Zero-copy serialization for RDF data
2//!
3//! This module provides efficient serialization/deserialization with minimal memory copies.
4
5use crate::model::{Term, Triple};
6use crate::OxirsError;
7use bytes::{Buf, BufMut, Bytes, BytesMut};
8use memmap2::{Mmap, MmapMut};
9use std::borrow::Cow;
10use std::fs::File;
11use std::io::{self, Write};
12use std::path::Path;
13use std::str;
14
15/// Trait for types that can be serialized with zero-copy
16pub trait ZeroCopySerialize {
17    /// Serialize to a writer
18    fn serialize_to<W: Write>(&self, writer: &mut W) -> io::Result<()>;
19
20    /// Get the serialized size in bytes
21    fn serialized_size(&self) -> usize;
22
23    /// Serialize to a byte buffer
24    fn serialize_to_bytes(&self, buf: &mut BytesMut);
25}
26
27/// Trait for types that can be deserialized with zero-copy
28pub trait ZeroCopyDeserialize<'a>: Sized {
29    /// Deserialize from a byte slice
30    fn deserialize_from(data: &'a [u8]) -> Result<(Self, &'a [u8]), OxirsError>;
31
32    /// Deserialize from a Bytes buffer
33    fn deserialize_from_bytes(buf: &mut Bytes) -> Result<Self, OxirsError>;
34}
35
36/// Zero-copy string that can be borrowed or owned
37#[derive(Debug, Clone, PartialEq, Eq, Hash)]
38pub struct ZeroCopyStr<'a>(pub Cow<'a, str>);
39
40impl<'a> ZeroCopyStr<'a> {
41    pub fn new_borrowed(s: &'a str) -> Self {
42        ZeroCopyStr(Cow::Borrowed(s))
43    }
44
45    pub fn new_owned(s: String) -> Self {
46        ZeroCopyStr(Cow::Owned(s))
47    }
48
49    pub fn as_str(&self) -> &str {
50        &self.0
51    }
52
53    pub fn into_owned(self) -> String {
54        self.0.into_owned()
55    }
56}
57
58/// Zero-copy IRI representation
59#[derive(Debug, Clone)]
60pub struct ZeroCopyIri<'a> {
61    value: ZeroCopyStr<'a>,
62}
63
64impl<'a> ZeroCopyIri<'a> {
65    pub fn new(value: ZeroCopyStr<'a>) -> Self {
66        Self { value }
67    }
68
69    pub fn as_str(&self) -> &str {
70        self.value.as_str()
71    }
72}
73
74/// Zero-copy blank node
75#[derive(Debug, Clone)]
76pub struct ZeroCopyBlankNode<'a> {
77    id: ZeroCopyStr<'a>,
78}
79
80impl<'a> ZeroCopyBlankNode<'a> {
81    pub fn new(id: ZeroCopyStr<'a>) -> Self {
82        Self { id }
83    }
84
85    pub fn id(&self) -> &str {
86        self.id.as_str()
87    }
88}
89
90/// Zero-copy literal
91#[derive(Debug, Clone)]
92pub struct ZeroCopyLiteral<'a> {
93    value: ZeroCopyStr<'a>,
94    language: Option<ZeroCopyStr<'a>>,
95    datatype: Option<ZeroCopyIri<'a>>,
96}
97
98impl<'a> ZeroCopyLiteral<'a> {
99    pub fn new_simple(value: ZeroCopyStr<'a>) -> Self {
100        Self {
101            value,
102            language: None,
103            datatype: None,
104        }
105    }
106
107    pub fn new_language_tagged(value: ZeroCopyStr<'a>, language: ZeroCopyStr<'a>) -> Self {
108        Self {
109            value,
110            language: Some(language),
111            datatype: None,
112        }
113    }
114
115    pub fn new_typed(value: ZeroCopyStr<'a>, datatype: ZeroCopyIri<'a>) -> Self {
116        Self {
117            value,
118            language: None,
119            datatype: Some(datatype),
120        }
121    }
122
123    pub fn value(&self) -> &str {
124        self.value.as_str()
125    }
126
127    pub fn language(&self) -> Option<&str> {
128        self.language.as_ref().map(|l| l.as_str())
129    }
130
131    pub fn datatype(&self) -> Option<&ZeroCopyIri<'a>> {
132        self.datatype.as_ref()
133    }
134}
135
136/// Zero-copy term
137#[derive(Debug, Clone)]
138pub enum ZeroCopyTerm<'a> {
139    NamedNode(ZeroCopyIri<'a>),
140    BlankNode(ZeroCopyBlankNode<'a>),
141    Literal(ZeroCopyLiteral<'a>),
142    Variable(ZeroCopyStr<'a>),
143    QuotedTriple(Box<ZeroCopyTriple<'a>>),
144}
145
146/// Zero-copy triple
147#[derive(Debug, Clone)]
148pub struct ZeroCopyTriple<'a> {
149    pub subject: ZeroCopyTerm<'a>,
150    pub predicate: ZeroCopyIri<'a>,
151    pub object: ZeroCopyTerm<'a>,
152}
153
154/// Zero-copy quad
155#[derive(Debug, Clone)]
156pub struct ZeroCopyQuad<'a> {
157    pub subject: ZeroCopyTerm<'a>,
158    pub predicate: ZeroCopyIri<'a>,
159    pub object: ZeroCopyTerm<'a>,
160    pub graph: Option<ZeroCopyTerm<'a>>,
161}
162
163// Binary format constants
164#[allow(dead_code)]
165const FORMAT_VERSION: u8 = 1;
166const TERM_NAMED_NODE: u8 = 0;
167const TERM_BLANK_NODE: u8 = 1;
168const TERM_LITERAL_SIMPLE: u8 = 2;
169const TERM_LITERAL_LANG: u8 = 3;
170const TERM_LITERAL_TYPED: u8 = 4;
171const TERM_VARIABLE: u8 = 5;
172const TERM_QUOTED_TRIPLE: u8 = 6;
173
174/// Write a length-prefixed string
175fn write_string<W: Write>(writer: &mut W, s: &str) -> io::Result<()> {
176    let bytes = s.as_bytes();
177    let len = bytes.len() as u32;
178    writer.write_all(&len.to_le_bytes())?;
179    writer.write_all(bytes)?;
180    Ok(())
181}
182
183/// Read a length-prefixed string (zero-copy)
184fn read_string(data: &[u8]) -> Result<(&str, &[u8]), OxirsError> {
185    if data.len() < 4 {
186        return Err(OxirsError::Parse(
187            "Insufficient data for string length".into(),
188        ));
189    }
190
191    let len = u32::from_le_bytes([data[0], data[1], data[2], data[3]]) as usize;
192    let data = &data[4..];
193
194    if data.len() < len {
195        return Err(OxirsError::Parse("Insufficient data for string".into()));
196    }
197
198    let s = str::from_utf8(&data[..len])
199        .map_err(|e| OxirsError::Parse(format!("Invalid UTF-8: {e}")))?;
200
201    Ok((s, &data[len..]))
202}
203
204impl ZeroCopySerialize for Term {
205    fn serialize_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
206        match self {
207            Term::NamedNode(n) => {
208                writer.write_all(&[TERM_NAMED_NODE])?;
209                write_string(writer, n.as_str())?;
210            }
211            Term::BlankNode(b) => {
212                writer.write_all(&[TERM_BLANK_NODE])?;
213                write_string(writer, b.as_str())?;
214            }
215            Term::Literal(l) => {
216                if let Some(lang) = l.language() {
217                    writer.write_all(&[TERM_LITERAL_LANG])?;
218                    write_string(writer, l.value())?;
219                    write_string(writer, lang)?;
220                } else if l.datatype().as_str() != "http://www.w3.org/2001/XMLSchema#string" {
221                    writer.write_all(&[TERM_LITERAL_TYPED])?;
222                    write_string(writer, l.value())?;
223                    write_string(writer, l.datatype().as_str())?;
224                } else {
225                    writer.write_all(&[TERM_LITERAL_SIMPLE])?;
226                    write_string(writer, l.value())?;
227                }
228            }
229            Term::Variable(v) => {
230                writer.write_all(&[TERM_VARIABLE])?;
231                write_string(writer, v.as_str())?;
232            }
233            Term::QuotedTriple(qt) => {
234                writer.write_all(&[TERM_QUOTED_TRIPLE])?;
235                // Convert and serialize the inner triple components
236                let subject_term = match qt.subject() {
237                    crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
238                    crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
239                    crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
240                    crate::model::Subject::QuotedTriple(nested_qt) => {
241                        Term::QuotedTriple(nested_qt.clone())
242                    }
243                };
244                subject_term.serialize_to(writer)?;
245
246                let predicate_term = match qt.predicate() {
247                    crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
248                    crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
249                };
250                predicate_term.serialize_to(writer)?;
251
252                let object_term = match qt.object() {
253                    crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
254                    crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
255                    crate::model::Object::Literal(l) => Term::Literal(l.clone()),
256                    crate::model::Object::Variable(v) => Term::Variable(v.clone()),
257                    crate::model::Object::QuotedTriple(nested_qt) => {
258                        Term::QuotedTriple(nested_qt.clone())
259                    }
260                };
261                object_term.serialize_to(writer)?;
262            }
263        }
264        Ok(())
265    }
266
267    fn serialized_size(&self) -> usize {
268        1 + match self {
269            Term::NamedNode(n) => 4 + n.as_str().len(),
270            Term::BlankNode(b) => 4 + b.as_str().len(),
271            Term::Literal(l) => {
272                if let Some(lang) = l.language() {
273                    4 + l.value().len() + 4 + lang.len()
274                } else if l.datatype().as_str() != "http://www.w3.org/2001/XMLSchema#string" {
275                    4 + l.value().len() + 4 + l.datatype().as_str().len()
276                } else {
277                    4 + l.value().len()
278                }
279            }
280            Term::Variable(v) => 4 + v.as_str().len(),
281            Term::QuotedTriple(qt) => {
282                // Convert and calculate sizes for the inner triple components
283                let subject_term = match qt.subject() {
284                    crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
285                    crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
286                    crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
287                    crate::model::Subject::QuotedTriple(nested_qt) => {
288                        Term::QuotedTriple(nested_qt.clone())
289                    }
290                };
291
292                let predicate_term = match qt.predicate() {
293                    crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
294                    crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
295                };
296
297                let object_term = match qt.object() {
298                    crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
299                    crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
300                    crate::model::Object::Literal(l) => Term::Literal(l.clone()),
301                    crate::model::Object::Variable(v) => Term::Variable(v.clone()),
302                    crate::model::Object::QuotedTriple(nested_qt) => {
303                        Term::QuotedTriple(nested_qt.clone())
304                    }
305                };
306
307                subject_term.serialized_size()
308                    + predicate_term.serialized_size()
309                    + object_term.serialized_size()
310            }
311        }
312    }
313
314    fn serialize_to_bytes(&self, buf: &mut BytesMut) {
315        match *self {
316            Term::NamedNode(ref n) => {
317                buf.put_u8(TERM_NAMED_NODE);
318                buf.put_u32_le(n.as_str().len() as u32);
319                buf.put_slice(n.as_str().as_bytes());
320            }
321            Term::BlankNode(ref b) => {
322                buf.put_u8(TERM_BLANK_NODE);
323                buf.put_u32_le(b.as_str().len() as u32);
324                buf.put_slice(b.as_str().as_bytes());
325            }
326            Term::Literal(ref l) => {
327                if let Some(lang) = l.language() {
328                    buf.put_u8(TERM_LITERAL_LANG);
329                    buf.put_u32_le(l.value().len() as u32);
330                    buf.put_slice(l.value().as_bytes());
331                    buf.put_u32_le(lang.len() as u32);
332                    buf.put_slice(lang.as_bytes());
333                } else if l.datatype().as_str() != "http://www.w3.org/2001/XMLSchema#string" {
334                    buf.put_u8(TERM_LITERAL_TYPED);
335                    buf.put_u32_le(l.value().len() as u32);
336                    buf.put_slice(l.value().as_bytes());
337                    buf.put_u32_le(l.datatype().as_str().len() as u32);
338                    buf.put_slice(l.datatype().as_str().as_bytes());
339                } else {
340                    buf.put_u8(TERM_LITERAL_SIMPLE);
341                    buf.put_u32_le(l.value().len() as u32);
342                    buf.put_slice(l.value().as_bytes());
343                }
344            }
345            Term::Variable(ref v) => {
346                buf.put_u8(TERM_VARIABLE);
347                buf.put_u32_le(v.as_str().len() as u32);
348                buf.put_slice(v.as_str().as_bytes());
349            }
350            Term::QuotedTriple(ref qt) => {
351                buf.put_u8(TERM_QUOTED_TRIPLE);
352                // Convert and serialize the inner triple components
353                let subject_term = match qt.subject() {
354                    crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
355                    crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
356                    crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
357                    crate::model::Subject::QuotedTriple(nested_qt) => {
358                        Term::QuotedTriple(nested_qt.clone())
359                    }
360                };
361                subject_term.serialize_to_bytes(buf);
362
363                let predicate_term = match qt.predicate() {
364                    crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
365                    crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
366                };
367                predicate_term.serialize_to_bytes(buf);
368
369                let object_term = match qt.object() {
370                    crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
371                    crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
372                    crate::model::Object::Literal(l) => Term::Literal(l.clone()),
373                    crate::model::Object::Variable(v) => Term::Variable(v.clone()),
374                    crate::model::Object::QuotedTriple(nested_qt) => {
375                        Term::QuotedTriple(nested_qt.clone())
376                    }
377                };
378                object_term.serialize_to_bytes(buf);
379            }
380        }
381    }
382}
383
384impl<'a> ZeroCopyDeserialize<'a> for ZeroCopyTerm<'a> {
385    fn deserialize_from(data: &'a [u8]) -> Result<(Self, &'a [u8]), OxirsError> {
386        if data.is_empty() {
387            return Err(OxirsError::Parse("No data for term type".into()));
388        }
389
390        let term_type = data[0];
391        let data = &data[1..];
392
393        match term_type {
394            TERM_NAMED_NODE => {
395                let (iri, rest) = read_string(data)?;
396                Ok((
397                    ZeroCopyTerm::NamedNode(ZeroCopyIri::new(ZeroCopyStr::new_borrowed(iri))),
398                    rest,
399                ))
400            }
401            TERM_BLANK_NODE => {
402                let (id, rest) = read_string(data)?;
403                Ok((
404                    ZeroCopyTerm::BlankNode(ZeroCopyBlankNode::new(ZeroCopyStr::new_borrowed(id))),
405                    rest,
406                ))
407            }
408            TERM_LITERAL_SIMPLE => {
409                let (value, rest) = read_string(data)?;
410                Ok((
411                    ZeroCopyTerm::Literal(ZeroCopyLiteral::new_simple(ZeroCopyStr::new_borrowed(
412                        value,
413                    ))),
414                    rest,
415                ))
416            }
417            TERM_LITERAL_LANG => {
418                let (value, data) = read_string(data)?;
419                let (lang, rest) = read_string(data)?;
420                Ok((
421                    ZeroCopyTerm::Literal(ZeroCopyLiteral::new_language_tagged(
422                        ZeroCopyStr::new_borrowed(value),
423                        ZeroCopyStr::new_borrowed(lang),
424                    )),
425                    rest,
426                ))
427            }
428            TERM_LITERAL_TYPED => {
429                let (value, data) = read_string(data)?;
430                let (datatype, rest) = read_string(data)?;
431                Ok((
432                    ZeroCopyTerm::Literal(ZeroCopyLiteral::new_typed(
433                        ZeroCopyStr::new_borrowed(value),
434                        ZeroCopyIri::new(ZeroCopyStr::new_borrowed(datatype)),
435                    )),
436                    rest,
437                ))
438            }
439            TERM_VARIABLE => {
440                let (name, rest) = read_string(data)?;
441                Ok((
442                    ZeroCopyTerm::Variable(ZeroCopyStr::new_borrowed(name)),
443                    rest,
444                ))
445            }
446            TERM_QUOTED_TRIPLE => {
447                // Deserialize the inner triple components
448                let (subject, data) = ZeroCopyTerm::deserialize_from(data)?;
449                let (predicate_term, data) = ZeroCopyTerm::deserialize_from(data)?;
450                let (object, rest) = ZeroCopyTerm::deserialize_from(data)?;
451
452                // Ensure predicate is a named node
453                let predicate = match predicate_term {
454                    ZeroCopyTerm::NamedNode(iri) => iri,
455                    _ => return Err(OxirsError::Parse("Predicate must be a named node".into())),
456                };
457
458                let triple = ZeroCopyTriple {
459                    subject,
460                    predicate,
461                    object,
462                };
463
464                Ok((ZeroCopyTerm::QuotedTriple(Box::new(triple)), rest))
465            }
466            _ => Err(OxirsError::Parse(format!("Unknown term type: {term_type}"))),
467        }
468    }
469
470    fn deserialize_from_bytes(buf: &mut Bytes) -> Result<Self, OxirsError> {
471        if buf.remaining() == 0 {
472            return Err(OxirsError::Parse("No data for term type".into()));
473        }
474
475        let term_type = buf.get_u8();
476
477        match term_type {
478            TERM_NAMED_NODE => {
479                let len = buf.get_u32_le() as usize;
480                let bytes = buf.split_to(len);
481                let iri = str::from_utf8(&bytes)
482                    .map_err(|e| OxirsError::Parse(format!("Invalid UTF-8: {e}")))?;
483                Ok(ZeroCopyTerm::NamedNode(ZeroCopyIri::new(
484                    ZeroCopyStr::new_owned(iri.to_string()),
485                )))
486            }
487            TERM_BLANK_NODE => {
488                let len = buf.get_u32_le() as usize;
489                let bytes = buf.split_to(len);
490                let id = str::from_utf8(&bytes)
491                    .map_err(|e| OxirsError::Parse(format!("Invalid UTF-8: {e}")))?;
492                Ok(ZeroCopyTerm::BlankNode(ZeroCopyBlankNode::new(
493                    ZeroCopyStr::new_owned(id.to_string()),
494                )))
495            }
496            TERM_LITERAL_SIMPLE => {
497                let len = buf.get_u32_le() as usize;
498                let bytes = buf.split_to(len);
499                let value = str::from_utf8(&bytes)
500                    .map_err(|e| OxirsError::Parse(format!("Invalid UTF-8: {e}")))?;
501                Ok(ZeroCopyTerm::Literal(ZeroCopyLiteral::new_simple(
502                    ZeroCopyStr::new_owned(value.to_string()),
503                )))
504            }
505            TERM_QUOTED_TRIPLE => {
506                // Deserialize the inner triple components
507                let subject = ZeroCopyTerm::deserialize_from_bytes(buf)?;
508                let predicate_term = ZeroCopyTerm::deserialize_from_bytes(buf)?;
509                let object = ZeroCopyTerm::deserialize_from_bytes(buf)?;
510
511                // Ensure predicate is a named node
512                let predicate = match predicate_term {
513                    ZeroCopyTerm::NamedNode(iri) => iri,
514                    _ => return Err(OxirsError::Parse("Predicate must be a named node".into())),
515                };
516
517                let triple = ZeroCopyTriple {
518                    subject,
519                    predicate,
520                    object,
521                };
522
523                Ok(ZeroCopyTerm::QuotedTriple(Box::new(triple)))
524            }
525            _ => Err(OxirsError::Parse(format!("Unknown term type: {term_type}"))),
526        }
527    }
528}
529
530impl ZeroCopySerialize for Triple {
531    fn serialize_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
532        // Convert Subject to Term
533        let subject_term = match self.subject() {
534            crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
535            crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
536            crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
537            crate::model::Subject::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
538        };
539        subject_term.serialize_to(writer)?;
540
541        // Convert Predicate to Term
542        let predicate_term = match self.predicate() {
543            crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
544            crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
545        };
546        predicate_term.serialize_to(writer)?;
547
548        // Convert Object to Term
549        let object_term = match self.object() {
550            crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
551            crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
552            crate::model::Object::Literal(l) => Term::Literal(l.clone()),
553            crate::model::Object::Variable(v) => Term::Variable(v.clone()),
554            crate::model::Object::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
555        };
556        object_term.serialize_to(writer)?;
557
558        Ok(())
559    }
560
561    fn serialized_size(&self) -> usize {
562        // Convert to Terms and use their serialized_size
563        let subject_term: Term = match self.subject() {
564            crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
565            crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
566            crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
567            crate::model::Subject::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
568        };
569
570        let predicate_term: Term = match self.predicate() {
571            crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
572            crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
573        };
574
575        let object_term: Term = match self.object() {
576            crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
577            crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
578            crate::model::Object::Literal(l) => Term::Literal(l.clone()),
579            crate::model::Object::Variable(v) => Term::Variable(v.clone()),
580            crate::model::Object::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
581        };
582
583        subject_term.serialized_size()
584            + predicate_term.serialized_size()
585            + object_term.serialized_size()
586    }
587
588    fn serialize_to_bytes(&self, buf: &mut BytesMut) {
589        // Convert and serialize subject
590        let subject_term: Term = match self.subject() {
591            crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
592            crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
593            crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
594            crate::model::Subject::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
595        };
596        subject_term.serialize_to_bytes(buf);
597
598        // Convert and serialize predicate
599        let predicate_term: Term = match self.predicate() {
600            crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
601            crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
602        };
603        predicate_term.serialize_to_bytes(buf);
604
605        // Convert and serialize object
606        let object_term: Term = match self.object() {
607            crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
608            crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
609            crate::model::Object::Literal(l) => Term::Literal(l.clone()),
610            crate::model::Object::Variable(v) => Term::Variable(v.clone()),
611            crate::model::Object::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
612        };
613        object_term.serialize_to_bytes(buf);
614    }
615}
616
617impl<'a> ZeroCopyDeserialize<'a> for ZeroCopyTriple<'a> {
618    fn deserialize_from(data: &'a [u8]) -> Result<(Self, &'a [u8]), OxirsError> {
619        let (subject, data) = ZeroCopyTerm::deserialize_from(data)?;
620        let (predicate, data) = ZeroCopyTerm::deserialize_from(data)?;
621        let (object, data) = ZeroCopyTerm::deserialize_from(data)?;
622
623        let predicate_iri = match predicate {
624            ZeroCopyTerm::NamedNode(iri) => iri,
625            _ => return Err(OxirsError::Parse("Predicate must be IRI".into())),
626        };
627
628        Ok((
629            ZeroCopyTriple {
630                subject,
631                predicate: predicate_iri,
632                object,
633            },
634            data,
635        ))
636    }
637
638    fn deserialize_from_bytes(buf: &mut Bytes) -> Result<Self, OxirsError> {
639        let subject = ZeroCopyTerm::deserialize_from_bytes(buf)?;
640        let predicate = ZeroCopyTerm::deserialize_from_bytes(buf)?;
641        let object = ZeroCopyTerm::deserialize_from_bytes(buf)?;
642
643        let predicate_iri = match predicate {
644            ZeroCopyTerm::NamedNode(iri) => iri,
645            _ => return Err(OxirsError::Parse("Predicate must be IRI".into())),
646        };
647
648        Ok(ZeroCopyTriple {
649            subject,
650            predicate: predicate_iri,
651            object,
652        })
653    }
654}
655
656/// Memory-mapped file for zero-copy reading
657pub struct MmapReader {
658    _file: File,
659    mmap: Mmap,
660}
661
662impl MmapReader {
663    pub fn new<P: AsRef<Path>>(path: P) -> io::Result<Self> {
664        let file = File::open(path)?;
665        let mmap = unsafe { Mmap::map(&file)? };
666        Ok(Self { _file: file, mmap })
667    }
668
669    pub fn data(&self) -> &[u8] {
670        &self.mmap
671    }
672
673    /// Iterate over triples in the file with zero-copy
674    pub fn iter_triples(&self) -> ZeroCopyTripleIterator<'_> {
675        ZeroCopyTripleIterator {
676            data: self.data(),
677            offset: 0,
678        }
679    }
680}
681
682/// Iterator over zero-copy triples
683pub struct ZeroCopyTripleIterator<'a> {
684    data: &'a [u8],
685    offset: usize,
686}
687
688impl<'a> Iterator for ZeroCopyTripleIterator<'a> {
689    type Item = Result<ZeroCopyTriple<'a>, OxirsError>;
690
691    fn next(&mut self) -> Option<Self::Item> {
692        if self.offset >= self.data.len() {
693            return None;
694        }
695
696        match ZeroCopyTriple::deserialize_from(&self.data[self.offset..]) {
697            Ok((triple, rest)) => {
698                self.offset = self.data.len() - rest.len();
699                Some(Ok(triple))
700            }
701            Err(e) => Some(Err(e)),
702        }
703    }
704}
705
706/// Memory-mapped file for zero-copy writing
707pub struct MmapWriter {
708    file: File,
709    mmap: MmapMut,
710    position: usize,
711}
712
713impl MmapWriter {
714    pub fn new<P: AsRef<Path>>(path: P, capacity: usize) -> io::Result<Self> {
715        let file = File::create(path)?;
716        file.set_len(capacity as u64)?;
717        let mmap = unsafe { MmapMut::map_mut(&file)? };
718        Ok(Self {
719            file,
720            mmap,
721            position: 0,
722        })
723    }
724
725    pub fn write_triple(&mut self, triple: &Triple) -> io::Result<()> {
726        let size = triple.serialized_size();
727        if self.position + size > self.mmap.len() {
728            return Err(io::Error::new(
729                io::ErrorKind::WriteZero,
730                "MmapWriter capacity exceeded",
731            ));
732        }
733
734        let mut cursor = io::Cursor::new(&mut self.mmap[self.position..]);
735        triple.serialize_to(&mut cursor)?;
736        self.position += size;
737        Ok(())
738    }
739
740    pub fn finalize(self) -> io::Result<()> {
741        // Truncate file to actual size
742        self.file.set_len(self.position as u64)?;
743        self.mmap.flush()?;
744        Ok(())
745    }
746}
747
748// Implement ZeroCopySerialize for Subject, Predicate, and Object
749impl ZeroCopySerialize for crate::model::Subject {
750    fn serialize_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
751        let term: Term = match self {
752            crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
753            crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
754            crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
755            crate::model::Subject::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
756        };
757        term.serialize_to(writer)
758    }
759
760    fn serialized_size(&self) -> usize {
761        let term: Term = match self {
762            crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
763            crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
764            crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
765            crate::model::Subject::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
766        };
767        term.serialized_size()
768    }
769
770    fn serialize_to_bytes(&self, buf: &mut BytesMut) {
771        let term: Term = match self {
772            crate::model::Subject::NamedNode(n) => Term::NamedNode(n.clone()),
773            crate::model::Subject::BlankNode(b) => Term::BlankNode(b.clone()),
774            crate::model::Subject::Variable(v) => Term::Variable(v.clone()),
775            crate::model::Subject::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
776        };
777        term.serialize_to_bytes(buf)
778    }
779}
780
781impl ZeroCopySerialize for crate::model::Predicate {
782    fn serialize_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
783        let term: Term = match self {
784            crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
785            crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
786        };
787        term.serialize_to(writer)
788    }
789
790    fn serialized_size(&self) -> usize {
791        let term: Term = match self {
792            crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
793            crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
794        };
795        term.serialized_size()
796    }
797
798    fn serialize_to_bytes(&self, buf: &mut BytesMut) {
799        let term: Term = match self {
800            crate::model::Predicate::NamedNode(n) => Term::NamedNode(n.clone()),
801            crate::model::Predicate::Variable(v) => Term::Variable(v.clone()),
802        };
803        term.serialize_to_bytes(buf)
804    }
805}
806
807impl ZeroCopySerialize for crate::model::Object {
808    fn serialize_to<W: Write>(&self, writer: &mut W) -> io::Result<()> {
809        let term: Term = match self {
810            crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
811            crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
812            crate::model::Object::Literal(l) => Term::Literal(l.clone()),
813            crate::model::Object::Variable(v) => Term::Variable(v.clone()),
814            crate::model::Object::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
815        };
816        term.serialize_to(writer)
817    }
818
819    fn serialized_size(&self) -> usize {
820        let term: Term = match self {
821            crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
822            crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
823            crate::model::Object::Literal(l) => Term::Literal(l.clone()),
824            crate::model::Object::Variable(v) => Term::Variable(v.clone()),
825            crate::model::Object::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
826        };
827        term.serialized_size()
828    }
829
830    fn serialize_to_bytes(&self, buf: &mut BytesMut) {
831        let term: Term = match self {
832            crate::model::Object::NamedNode(n) => Term::NamedNode(n.clone()),
833            crate::model::Object::BlankNode(b) => Term::BlankNode(b.clone()),
834            crate::model::Object::Literal(l) => Term::Literal(l.clone()),
835            crate::model::Object::Variable(v) => Term::Variable(v.clone()),
836            crate::model::Object::QuotedTriple(qt) => Term::QuotedTriple(qt.clone()),
837        };
838        term.serialize_to_bytes(buf)
839    }
840}
841
842#[cfg(test)]
843mod tests {
844    use super::*;
845    use crate::{Literal, NamedNode};
846
847    #[test]
848    fn test_term_serialization() {
849        let term = Term::NamedNode(NamedNode::new("http://example.org/test").unwrap());
850
851        let mut buf = Vec::new();
852        term.serialize_to(&mut buf).unwrap();
853
854        let (deserialized, rest) = ZeroCopyTerm::deserialize_from(&buf).unwrap();
855        assert!(rest.is_empty());
856
857        match deserialized {
858            ZeroCopyTerm::NamedNode(iri) => {
859                assert_eq!(iri.as_str(), "http://example.org/test");
860            }
861            _ => panic!("Wrong term type"),
862        }
863    }
864
865    #[test]
866    fn test_triple_serialization() {
867        let triple = Triple::new(
868            NamedNode::new("http://example.org/subject").unwrap(),
869            NamedNode::new("http://example.org/predicate").unwrap(),
870            Literal::new("Object"),
871        );
872
873        let mut buf = Vec::new();
874        triple.serialize_to(&mut buf).unwrap();
875
876        let (deserialized, rest) = ZeroCopyTriple::deserialize_from(&buf).unwrap();
877        assert!(rest.is_empty());
878
879        match &deserialized.subject {
880            ZeroCopyTerm::NamedNode(iri) => {
881                assert_eq!(iri.as_str(), "http://example.org/subject");
882            }
883            _ => panic!("Wrong subject type"),
884        }
885
886        assert_eq!(
887            deserialized.predicate.as_str(),
888            "http://example.org/predicate"
889        );
890
891        match &deserialized.object {
892            ZeroCopyTerm::Literal(lit) => {
893                assert_eq!(lit.value(), "Object");
894            }
895            _ => panic!("Wrong object type"),
896        }
897    }
898
899    #[test]
900    fn test_bytes_serialization() {
901        let term = Term::Literal(Literal::new("Hello, World!"));
902
903        let mut buf = BytesMut::with_capacity(term.serialized_size());
904        term.serialize_to_bytes(&mut buf);
905
906        let mut bytes = buf.freeze();
907        let deserialized = ZeroCopyTerm::deserialize_from_bytes(&mut bytes).unwrap();
908
909        match deserialized {
910            ZeroCopyTerm::Literal(lit) => {
911                assert_eq!(lit.value(), "Hello, World!");
912            }
913            _ => panic!("Wrong term type"),
914        }
915    }
916}