Skip to main content

oxirs_core/format/
mod.rs

1//! RDF Format Support Module
2//!
3//! Phase 3 of OxiGraph extraction: Complete format support for all major RDF serializations.
4//! Extracted and adapted from OxiGraph format libraries with OxiRS enhancements.
5//!
6//! Provides unified parsing and serialization for:
7//! - Turtle (.ttl)
8//! - N-Triples (.nt)
9//! - N-Quads (.nq)
10//! - TriG (.trig)
11//! - RDF/XML (.rdf, .xml)
12//! - JSON-LD (.jsonld)
13//! - N3 (.n3)
14
15pub mod error;
16#[allow(clippy::module_inception)]
17pub mod format;
18pub mod jsonld;
19pub mod n3;
20pub mod n3_lexer;
21pub mod nquads;
22pub mod ntriples;
23pub mod parser;
24pub mod rdfxml;
25pub mod serializer;
26pub mod toolkit;
27pub mod trig;
28pub mod turtle;
29pub mod turtle_grammar;
30pub mod w3c_tests;
31
32// Re-export key types
33pub use error::{FormatError, RdfParseError, RdfSyntaxError, TextPosition};
34pub use format::RdfFormat;
35pub use parser::{QuadParseResult, RdfParser, ReaderQuadParser, SliceQuadParser};
36pub use serializer::{QuadSerializeResult, RdfSerializer, WriterQuadSerializer};
37
38// Format-specific re-exports
39pub use format::{JsonLdProfile, JsonLdProfileSet};
40pub use jsonld::{JsonLdParser, JsonLdSerializer};
41pub use n3::N3Serializer;
42pub use nquads::NQuadsSerializer;
43pub use ntriples::{NTriplesParser, NTriplesSerializer};
44pub use rdfxml::{RdfXmlParser, RdfXmlSerializer};
45pub use trig::TriGSerializer;
46pub use turtle::{TurtleParser, TurtleSerializer};
47
48// W3C compliance testing
49pub use w3c_tests::{
50    run_w3c_compliance_tests, RdfComplianceStats, RdfTestResult, RdfTestStatus, RdfTestType,
51    W3cRdfTestConfig, W3cRdfTestSuiteRunner,
52};
53
54use crate::model::{Quad, Triple};
55use crate::OxirsError;
56use std::io::{Read, Write};
57
58/// Result type for format operations
59pub type FormatResult<T> = Result<T, FormatError>;
60
61/// Trait for format detection from content or metadata
62pub trait FormatDetection {
63    /// Detect format from file extension
64    fn from_extension(extension: &str) -> Option<RdfFormat>;
65
66    /// Detect format from media type
67    fn from_media_type(media_type: &str) -> Option<RdfFormat>;
68
69    /// Detect format from content analysis (magic bytes, syntax patterns)
70    fn from_content(content: &[u8]) -> Option<RdfFormat>;
71
72    /// Detect format from filename
73    fn from_filename(filename: &str) -> Option<RdfFormat> {
74        std::path::Path::new(filename)
75            .extension()
76            .and_then(|ext| ext.to_str())
77            .and_then(Self::from_extension)
78    }
79}
80
81/// Unified RDF format handler combining parsing and serialization
82pub struct FormatHandler {
83    format: RdfFormat,
84}
85
86impl FormatHandler {
87    /// Create a new format handler for the specified format
88    pub fn new(format: RdfFormat) -> Self {
89        Self { format }
90    }
91
92    /// Parse RDF from a reader into quads
93    pub fn parse_quads<R: Read + Send + 'static>(&self, reader: R) -> FormatResult<Vec<Quad>> {
94        let parser = RdfParser::new(self.format.clone());
95        let mut quads = Vec::new();
96
97        for quad_result in parser.for_reader(reader) {
98            quads.push(quad_result?);
99        }
100
101        Ok(quads)
102    }
103
104    /// Parse RDF from a reader into triples (only default graph)
105    pub fn parse_triples<R: Read + Send + 'static>(&self, reader: R) -> FormatResult<Vec<Triple>> {
106        let quads = self.parse_quads(reader)?;
107        Ok(quads
108            .into_iter()
109            .filter_map(|quad| quad.triple_in_default_graph())
110            .collect())
111    }
112
113    /// Serialize quads to a writer
114    pub fn serialize_quads<W: Write + 'static>(
115        &self,
116        writer: W,
117        quads: &[Quad],
118    ) -> FormatResult<()> {
119        let mut serializer = RdfSerializer::new(self.format.clone()).for_writer(writer);
120
121        for quad in quads {
122            serializer.serialize_quad(quad.as_ref())?;
123        }
124
125        serializer.finish()?;
126        Ok(())
127    }
128
129    /// Serialize triples to a writer (places in default graph)
130    pub fn serialize_triples<W: Write + 'static>(
131        &self,
132        writer: W,
133        triples: &[Triple],
134    ) -> FormatResult<()> {
135        let quads: Vec<Quad> = triples.iter().map(|triple| triple.clone().into()).collect();
136        self.serialize_quads(writer, &quads)
137    }
138
139    /// Get the format
140    pub fn format(&self) -> RdfFormat {
141        self.format.clone()
142    }
143}
144
145impl FormatDetection for FormatHandler {
146    fn from_extension(extension: &str) -> Option<RdfFormat> {
147        RdfFormat::from_extension(extension)
148    }
149
150    fn from_media_type(media_type: &str) -> Option<RdfFormat> {
151        RdfFormat::from_media_type(media_type)
152    }
153
154    fn from_content(content: &[u8]) -> Option<RdfFormat> {
155        // Simple heuristics for format detection
156        let content_str = std::str::from_utf8(content).ok()?;
157        let content_lower = content_str.to_lowercase();
158
159        // Check for XML-like structures (RDF/XML)
160        if content_lower.contains("<?xml") || content_lower.contains("<rdf:") {
161            return Some(RdfFormat::RdfXml);
162        }
163
164        // Check for JSON-LD
165        if content_lower.trim_start().starts_with('{')
166            && (content_lower.contains("@context") || content_lower.contains("@type"))
167        {
168            return Some(RdfFormat::JsonLd {
169                profile: JsonLdProfileSet::empty(),
170            });
171        }
172
173        // Check for Turtle-family formats
174        if content_lower.contains("@prefix") || content_lower.contains("@base") {
175            if content_lower.contains("graph") {
176                return Some(RdfFormat::TriG);
177            }
178            return Some(RdfFormat::Turtle);
179        }
180
181        // Check for N-Quads (4 terms per line)
182        let lines: Vec<&str> = content_str.lines().take(10).collect();
183        if lines.iter().any(|line| {
184            let parts: Vec<&str> = line.split_whitespace().collect();
185            parts.len() >= 4 && line.ends_with(" .")
186        }) {
187            return Some(RdfFormat::NQuads);
188        }
189
190        // Check for N-Triples (3 terms per line)
191        if lines.iter().any(|line| {
192            let parts: Vec<&str> = line.split_whitespace().collect();
193            parts.len() >= 3 && line.ends_with(" .")
194        }) {
195            return Some(RdfFormat::NTriples);
196        }
197
198        None
199    }
200}
201
202/// Convert OxiRS errors to format errors
203impl From<OxirsError> for FormatError {
204    fn from(err: OxirsError) -> Self {
205        FormatError::InvalidData(err.to_string())
206    }
207}
208
209#[cfg(test)]
210mod tests {
211    use super::*;
212
213    #[test]
214    fn test_format_detection_from_extension() {
215        assert_eq!(
216            FormatHandler::from_extension("ttl"),
217            Some(RdfFormat::Turtle)
218        );
219        assert_eq!(
220            FormatHandler::from_extension("nt"),
221            Some(RdfFormat::NTriples)
222        );
223        assert_eq!(
224            FormatHandler::from_extension("jsonld"),
225            Some(RdfFormat::JsonLd {
226                profile: JsonLdProfileSet::empty()
227            })
228        );
229        assert_eq!(FormatHandler::from_extension("unknown"), None);
230    }
231
232    #[test]
233    fn test_format_detection_from_content() {
234        let turtle_content = b"@prefix ex: <http://example.org/> .\nex:foo ex:bar ex:baz .";
235        assert_eq!(
236            FormatHandler::from_content(turtle_content),
237            Some(RdfFormat::Turtle)
238        );
239
240        let jsonld_content = br#"{"@context": "http://example.org/", "@type": "Person"}"#;
241        assert_eq!(
242            FormatHandler::from_content(jsonld_content),
243            Some(RdfFormat::JsonLd {
244                profile: JsonLdProfileSet::empty()
245            })
246        );
247
248        let rdfxml_content = b"<?xml version=\"1.0\"?>\n<rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">";
249        assert_eq!(
250            FormatHandler::from_content(rdfxml_content),
251            Some(RdfFormat::RdfXml)
252        );
253    }
254}