Skip to main content

oxirs_core/format/
trig.rs

1//! TriG format serializer and parser
2//!
3//! TriG extends Turtle with support for named graphs, allowing multiple RDF graphs
4//! to be serialized in a single document with graph-level organization.
5//!
6//! W3C Specification: <https://www.w3.org/TR/trig/>
7
8use super::error::FormatError;
9use crate::model::{
10    GraphName, Literal, NamedNode, ObjectRef, PredicateRef, Quad, QuadRef, SubjectRef,
11};
12use std::collections::{BTreeMap, HashMap};
13use std::io::Write;
14
15/// TriG serializer for writing RDF quads with named graph support
16#[derive(Debug, Clone)]
17pub struct TriGSerializer {
18    /// Base IRI for relative IRI resolution
19    base_iri: Option<String>,
20    /// Prefix declarations for compact serialization
21    prefixes: HashMap<String, String>,
22    /// Pretty printing with indentation
23    pretty: bool,
24}
25
26impl TriGSerializer {
27    /// Create a new TriG serializer
28    pub fn new() -> Self {
29        let mut prefixes = HashMap::new();
30
31        // Add common prefixes
32        prefixes.insert(
33            "rdf".to_string(),
34            "http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
35        );
36        prefixes.insert(
37            "rdfs".to_string(),
38            "http://www.w3.org/2000/01/rdf-schema#".to_string(),
39        );
40        prefixes.insert(
41            "xsd".to_string(),
42            "http://www.w3.org/2001/XMLSchema#".to_string(),
43        );
44
45        Self {
46            base_iri: None,
47            prefixes,
48            pretty: false,
49        }
50    }
51
52    /// Set the base IRI
53    pub fn with_base_iri(mut self, base: &str) -> Self {
54        self.base_iri = Some(base.to_string());
55        self
56    }
57
58    /// Add a prefix mapping
59    pub fn with_prefix(mut self, prefix: &str, iri: &str) -> Self {
60        self.prefixes.insert(prefix.to_string(), iri.to_string());
61        self
62    }
63
64    /// Enable pretty printing
65    pub fn pretty(mut self) -> Self {
66        self.pretty = true;
67        self
68    }
69
70    /// Wrap this serializer for a specific writer
71    pub fn for_writer<W: Write + 'static>(self, writer: W) -> TriGWriter<W> {
72        TriGWriter {
73            writer,
74            serializer: self,
75            buffer: Vec::new(),
76        }
77    }
78
79    /// Serialize quads grouped by graph
80    fn serialize_quads<W: Write>(&self, quads: &[Quad], writer: &mut W) -> Result<(), FormatError> {
81        // Write prefix declarations
82        for (prefix, namespace) in &self.prefixes {
83            writeln!(writer, "@prefix {}: <{}> .", prefix, namespace).map_err(FormatError::from)?;
84        }
85
86        if !self.prefixes.is_empty() {
87            writeln!(writer).map_err(FormatError::from)?;
88        }
89
90        // Group quads by graph
91        let grouped = self.group_quads_by_graph(quads);
92
93        for (graph_name, graph_quads) in grouped {
94            match graph_name {
95                GraphName::DefaultGraph => {
96                    // Serialize default graph triples directly
97                    for quad in graph_quads {
98                        self.serialize_triple(quad.as_ref(), writer)?;
99                        writeln!(writer, " .").map_err(FormatError::from)?;
100                    }
101                }
102                GraphName::NamedNode(node) => {
103                    // Named graph
104                    self.write_named_node(&node, writer)?;
105                    writeln!(writer, " {{").map_err(FormatError::from)?;
106
107                    for quad in graph_quads {
108                        if self.pretty {
109                            write!(writer, "    ").map_err(FormatError::from)?;
110                        }
111                        self.serialize_triple(quad.as_ref(), writer)?;
112                        writeln!(writer, " .").map_err(FormatError::from)?;
113                    }
114
115                    writeln!(writer, "}}").map_err(FormatError::from)?;
116                }
117                GraphName::BlankNode(node) => {
118                    // Blank node graph
119                    let id = node.as_str();
120                    let id = id.strip_prefix("_:").unwrap_or(id);
121                    writeln!(writer, "_:{} {{", id).map_err(FormatError::from)?;
122
123                    for quad in graph_quads {
124                        if self.pretty {
125                            write!(writer, "    ").map_err(FormatError::from)?;
126                        }
127                        self.serialize_triple(quad.as_ref(), writer)?;
128                        writeln!(writer, " .").map_err(FormatError::from)?;
129                    }
130
131                    writeln!(writer, "}}").map_err(FormatError::from)?;
132                }
133                GraphName::Variable(_) => {
134                    return Err(FormatError::InvalidData(
135                        "Cannot serialize variable graph names".to_string(),
136                    ));
137                }
138            }
139
140            if self.pretty {
141                writeln!(writer).map_err(FormatError::from)?;
142            }
143        }
144
145        Ok(())
146    }
147
148    fn serialize_triple<W: Write>(
149        &self,
150        quad: QuadRef<'_>,
151        writer: &mut W,
152    ) -> Result<(), FormatError> {
153        self.write_subject(quad.subject(), writer)?;
154        write!(writer, " ").map_err(FormatError::from)?;
155
156        self.write_predicate(quad.predicate(), writer)?;
157        write!(writer, " ").map_err(FormatError::from)?;
158
159        self.write_object(quad.object(), writer)?;
160
161        Ok(())
162    }
163
164    fn write_subject<W: Write>(
165        &self,
166        subject: SubjectRef<'_>,
167        writer: &mut W,
168    ) -> Result<(), FormatError> {
169        match subject {
170            SubjectRef::NamedNode(node) => self.write_named_node(node, writer)?,
171            SubjectRef::BlankNode(node) => {
172                let id = node.as_str();
173                let id = id.strip_prefix("_:").unwrap_or(id);
174                write!(writer, "_:{}", id).map_err(FormatError::from)?;
175            }
176            SubjectRef::Variable(var) => {
177                write!(writer, "?{}", var.name()).map_err(FormatError::from)?;
178            }
179        }
180        Ok(())
181    }
182
183    fn write_predicate<W: Write>(
184        &self,
185        predicate: PredicateRef<'_>,
186        writer: &mut W,
187    ) -> Result<(), FormatError> {
188        match predicate {
189            PredicateRef::NamedNode(node) => {
190                // Check for rdf:type abbreviation
191                if node.as_str() == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" {
192                    write!(writer, "a").map_err(FormatError::from)?;
193                } else {
194                    self.write_named_node(node, writer)?;
195                }
196            }
197            PredicateRef::Variable(var) => {
198                write!(writer, "?{}", var.name()).map_err(FormatError::from)?;
199            }
200        }
201        Ok(())
202    }
203
204    fn write_object<W: Write>(
205        &self,
206        object: ObjectRef<'_>,
207        writer: &mut W,
208    ) -> Result<(), FormatError> {
209        match object {
210            ObjectRef::NamedNode(node) => self.write_named_node(node, writer)?,
211            ObjectRef::BlankNode(node) => {
212                let id = node.as_str();
213                let id = id.strip_prefix("_:").unwrap_or(id);
214                write!(writer, "_:{}", id).map_err(FormatError::from)?;
215            }
216            ObjectRef::Literal(literal) => self.write_literal(literal, writer)?,
217            ObjectRef::Variable(var) => {
218                write!(writer, "?{}", var.name()).map_err(FormatError::from)?;
219            }
220        }
221        Ok(())
222    }
223
224    fn write_named_node<W: Write>(
225        &self,
226        node: &NamedNode,
227        writer: &mut W,
228    ) -> Result<(), FormatError> {
229        let iri = node.as_str();
230
231        // Try to use a prefix
232        for (prefix, namespace) in &self.prefixes {
233            if let Some(local) = iri.strip_prefix(namespace) {
234                write!(writer, "{}:{}", prefix, local).map_err(FormatError::from)?;
235                return Ok(());
236            }
237        }
238
239        // Use full IRI
240        write!(writer, "<{}>", iri).map_err(FormatError::from)?;
241        Ok(())
242    }
243
244    fn write_literal<W: Write>(
245        &self,
246        literal: &Literal,
247        writer: &mut W,
248    ) -> Result<(), FormatError> {
249        let value = literal.value();
250        let escaped = self.escape_string(value);
251
252        write!(writer, "\"{}\"", escaped).map_err(FormatError::from)?;
253
254        if let Some(lang) = literal.language() {
255            write!(writer, "@{}", lang).map_err(FormatError::from)?;
256        } else {
257            let datatype = literal.datatype();
258            if datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string" {
259                write!(writer, "^^").map_err(FormatError::from)?;
260                self.write_named_node(&datatype.into_owned(), writer)?;
261            }
262        }
263
264        Ok(())
265    }
266
267    fn escape_string(&self, s: &str) -> String {
268        let mut result = String::with_capacity(s.len());
269        for ch in s.chars() {
270            match ch {
271                '\\' => result.push_str("\\\\"),
272                '\"' => result.push_str("\\\""),
273                '\n' => result.push_str("\\n"),
274                '\r' => result.push_str("\\r"),
275                '\t' => result.push_str("\\t"),
276                c if c.is_control() => {
277                    result.push_str(&format!("\\u{:04X}", c as u32));
278                }
279                c => result.push(c),
280            }
281        }
282        result
283    }
284
285    fn group_quads_by_graph<'a>(&self, quads: &'a [Quad]) -> BTreeMap<GraphName, Vec<&'a Quad>> {
286        let mut grouped = BTreeMap::new();
287
288        for quad in quads {
289            grouped
290                .entry(quad.graph_name().clone())
291                .or_insert_with(Vec::new)
292                .push(quad);
293        }
294
295        grouped
296    }
297}
298
299impl Default for TriGSerializer {
300    fn default() -> Self {
301        Self::new()
302    }
303}
304
305/// Writer wrapper for TriG serialization
306pub struct TriGWriter<W: Write> {
307    writer: W,
308    serializer: TriGSerializer,
309    buffer: Vec<Quad>,
310}
311
312impl<W: Write> TriGWriter<W> {
313    /// Serialize a single quad (buffered until finish)
314    pub fn serialize_quad(&mut self, quad: QuadRef<'_>) -> Result<(), FormatError> {
315        self.buffer.push(quad.into());
316        Ok(())
317    }
318
319    /// Finish serialization and return the writer
320    pub fn finish(mut self) -> Result<W, FormatError> {
321        self.serializer
322            .serialize_quads(&self.buffer, &mut self.writer)?;
323        Ok(self.writer)
324    }
325}
326
327/// Implement the QuadSerializer trait for integration with the format system
328impl<W: Write> super::serializer::QuadSerializer<W> for TriGWriter<W> {
329    fn serialize_quad(&mut self, quad: QuadRef<'_>) -> super::serializer::QuadSerializeResult {
330        TriGWriter::serialize_quad(self, quad)
331            .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
332    }
333
334    fn finish(self: Box<Self>) -> super::error::SerializeResult<W> {
335        TriGWriter::finish(*self).map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342    use crate::model::{NamedNode, Object, Quad, Subject, Triple};
343
344    #[test]
345    fn test_trig_serialize_default_graph() {
346        let serializer = TriGSerializer::new();
347        let mut writer = Vec::new();
348
349        let triple = Triple::new(
350            Subject::NamedNode(NamedNode::new("http://example.org/subject").expect("valid IRI")),
351            NamedNode::new("http://example.org/predicate").expect("valid IRI"),
352            Object::NamedNode(NamedNode::new("http://example.org/object").expect("valid IRI")),
353        );
354
355        let quads = vec![Quad::from(triple)];
356        serializer
357            .serialize_quads(&quads, &mut writer)
358            .expect("operation should succeed");
359
360        let output = String::from_utf8(writer).expect("bytes should be valid UTF-8");
361        assert!(output.contains("<http://example.org/subject>"));
362        assert!(output.contains("<http://example.org/predicate>"));
363        assert!(output.contains("<http://example.org/object>"));
364    }
365
366    #[test]
367    fn test_trig_serialize_named_graph() {
368        let serializer = TriGSerializer::new();
369        let mut writer = Vec::new();
370
371        let quad = Quad::new(
372            Subject::NamedNode(NamedNode::new("http://example.org/subject").expect("valid IRI")),
373            NamedNode::new("http://example.org/predicate").expect("valid IRI"),
374            Object::NamedNode(NamedNode::new("http://example.org/object").expect("valid IRI")),
375            GraphName::NamedNode(NamedNode::new("http://example.org/graph").expect("valid IRI")),
376        );
377
378        let quads = vec![quad];
379        serializer
380            .serialize_quads(&quads, &mut writer)
381            .expect("operation should succeed");
382
383        let output = String::from_utf8(writer).expect("bytes should be valid UTF-8");
384        assert!(output.contains("<http://example.org/graph>"));
385        assert!(output.contains("{"));
386        assert!(output.contains("}"));
387    }
388}