Skip to main content

oxirs_core/format/parser/
mod.rs

1//! Unified RDF Parser Interface
2//!
3//! Provides a consistent API for parsing all supported RDF formats.
4//! Extracted and adapted from OxiGraph with OxiRS enhancements.
5
6pub(crate) mod helpers;
7mod jsonld;
8mod ntriples;
9mod rdfxml;
10mod turtle;
11
12use super::error::ParseResult;
13use super::format::RdfFormat;
14use crate::model::Quad;
15use std::io::Read;
16
17/// Result type for quad parsing operations
18pub type QuadParseResult = ParseResult<Quad>;
19
20/// Iterator over parsed quads from a reader
21pub struct ReaderQuadParser<'a, R: Read> {
22    inner: Box<dyn Iterator<Item = QuadParseResult> + Send + 'a>,
23    _phantom: std::marker::PhantomData<R>,
24}
25
26impl<'a, R: Read> ReaderQuadParser<'a, R> {
27    /// Create a new reader parser
28    pub fn new(iter: Box<dyn Iterator<Item = QuadParseResult> + Send + 'a>) -> Self {
29        Self {
30            inner: iter,
31            _phantom: std::marker::PhantomData,
32        }
33    }
34}
35
36impl<'a, R: Read> Iterator for ReaderQuadParser<'a, R> {
37    type Item = QuadParseResult;
38
39    fn next(&mut self) -> Option<Self::Item> {
40        self.inner.next()
41    }
42}
43
44/// Iterator over parsed quads from a byte slice
45pub struct SliceQuadParser<'a> {
46    inner: Box<dyn Iterator<Item = QuadParseResult> + 'a>,
47}
48
49impl<'a> SliceQuadParser<'a> {
50    /// Create a new slice parser
51    pub fn new(iter: Box<dyn Iterator<Item = QuadParseResult> + 'a>) -> Self {
52        Self { inner: iter }
53    }
54}
55
56impl<'a> Iterator for SliceQuadParser<'a> {
57    type Item = QuadParseResult;
58
59    fn next(&mut self) -> Option<Self::Item> {
60        self.inner.next()
61    }
62}
63
64/// Unified RDF parser supporting all formats
65#[derive(Debug, Clone)]
66pub struct RdfParser {
67    format: RdfFormat,
68    base_iri: Option<String>,
69    prefixes: std::collections::HashMap<String, String>,
70    lenient: bool,
71}
72
73impl RdfParser {
74    /// Create a new parser for the specified format
75    pub fn new(format: RdfFormat) -> Self {
76        Self {
77            format,
78            base_iri: None,
79            prefixes: std::collections::HashMap::new(),
80            lenient: false,
81        }
82    }
83
84    /// Set the base IRI for resolving relative IRIs
85    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Self {
86        self.base_iri = Some(base_iri.into());
87        self
88    }
89
90    /// Add a namespace prefix
91    pub fn with_prefix(mut self, prefix: impl Into<String>, iri: impl Into<String>) -> Self {
92        self.prefixes.insert(prefix.into(), iri.into());
93        self
94    }
95
96    /// Enable lenient parsing (skip some validations for performance)
97    pub fn lenient(mut self) -> Self {
98        self.lenient = true;
99        self
100    }
101
102    /// Parse from a reader
103    pub fn for_reader<R: Read + Send + 'static>(self, reader: R) -> ReaderQuadParser<'static, R> {
104        match self.format {
105            RdfFormat::Turtle => turtle::parse_reader(self, reader),
106            RdfFormat::NTriples => ntriples::parse_ntriples_reader(self, reader),
107            RdfFormat::NQuads => ntriples::parse_nquads_reader(self, reader),
108            RdfFormat::TriG => ntriples::parse_trig_reader(self, reader),
109            RdfFormat::RdfXml => rdfxml::parse_reader(self, reader),
110            RdfFormat::JsonLd { .. } => jsonld::parse_reader(self, reader),
111            RdfFormat::N3 => ntriples::parse_n3_reader(self, reader),
112        }
113    }
114
115    /// Parse from a byte slice
116    pub fn for_slice<'a>(self, slice: &'a [u8]) -> SliceQuadParser<'a> {
117        match self.format {
118            RdfFormat::Turtle => turtle::parse_slice(self, slice),
119            RdfFormat::NTriples => ntriples::parse_ntriples_slice(self, slice),
120            RdfFormat::NQuads => ntriples::parse_nquads_slice(self, slice),
121            RdfFormat::TriG => ntriples::parse_trig_slice(self, slice),
122            RdfFormat::RdfXml => rdfxml::parse_slice(self, slice),
123            RdfFormat::JsonLd { .. } => jsonld::parse_slice(self, slice),
124            RdfFormat::N3 => ntriples::parse_n3_slice(self, slice),
125        }
126    }
127
128    /// Get the format being parsed
129    pub fn format(&self) -> RdfFormat {
130        self.format.clone()
131    }
132
133    /// Get the base IRI
134    pub fn base_iri(&self) -> Option<&str> {
135        self.base_iri.as_deref()
136    }
137
138    /// Get the prefixes
139    pub fn prefixes(&self) -> &std::collections::HashMap<String, String> {
140        &self.prefixes
141    }
142
143    /// Check if lenient parsing is enabled
144    pub fn is_lenient(&self) -> bool {
145        self.lenient
146    }
147}
148
149impl Default for RdfParser {
150    fn default() -> Self {
151        Self::new(RdfFormat::Turtle)
152    }
153}