Skip to main content

oxirs_core/rdfxml/
parser_lexer.rs

1//! Public-facing RDF/XML parser types — tokenizer / lexical phase.
2//!
3//! Contains `RdfXmlParser` (the entry-point builder), reader/slice/async
4//! parser wrappers, and the prefix iterator `RdfXmlPrefixesIter`.
5
6use crate::model::{NamedOrBlankNode, Term, Triple};
7use crate::rdfxml::error::{RdfXmlParseError, RdfXmlSyntaxError};
8use crate::rdfxml::parser_types::InternalRdfXmlParser;
9use oxiri::{Iri, IriParseError};
10use quick_xml::escape::unescape_with;
11use quick_xml::name::{NamespaceBindingsIter, PrefixDeclaration};
12use quick_xml::{Decoder, NsReader};
13use std::borrow::Cow;
14use std::collections::{HashMap, HashSet};
15use std::io::{BufReader, Read};
16#[cfg(feature = "async-tokio")]
17use tokio::io::{AsyncRead, BufReader as AsyncBufReader};
18
19use crate::rdfxml::utils::is_nc_name;
20
21impl From<NamedOrBlankNode> for Term {
22    fn from(node: NamedOrBlankNode) -> Self {
23        match node {
24            NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n),
25            NamedOrBlankNode::BlankNode(n) => Term::BlankNode(n),
26        }
27    }
28}
29
30/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) streaming parser.
31///
32/// It reads the file in streaming.
33/// It does not keep data in memory except a stack for handling nested XML tags, and a set of all
34/// seen `rdf:ID`s to detect duplicate ids and fail according to the specification.
35///
36/// Its performances are not optimized yet and hopefully could be significantly enhanced by reducing the
37/// number of allocations and copies done by the parser.
38///
39/// Count the number of people:
40/// ```
41/// use oxirs_core::model::NamedNode;
42/// use oxirs_core::{Predicate, Object};
43/// use oxirs_core::rdfxml::RdfXmlParser;
44///
45/// let file = br#"<?xml version="1.0"?>
46/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
47///  <rdf:Description rdf:about="http://example.com/foo">
48///    <rdf:type rdf:resource="http://schema.org/Person" />
49///    <schema:name>Foo</schema:name>
50///  </rdf:Description>
51///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
52/// </rdf:RDF>"#;
53///
54/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
55/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
56/// let mut count = 0;
57/// for triple in RdfXmlParser::new().for_reader(file.as_ref()) {
58///     let triple = triple.expect("triple should be valid");
59///     if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
60///         count += 1;
61///     }
62/// }
63/// assert_eq!(2, count);
64/// ```
65#[derive(Default, Clone)]
66#[must_use]
67pub struct RdfXmlParser {
68    pub(super) lenient: bool,
69    pub(super) base: Option<Iri<String>>,
70}
71
72impl RdfXmlParser {
73    /// Builds a new [`RdfXmlParser`].
74    #[inline]
75    pub fn new() -> Self {
76        Self::default()
77    }
78
79    /// Assumes the file is valid to make parsing faster.
80    ///
81    /// It will skip some validations.
82    ///
83    /// Note that if the file is actually not valid, the parser might emit broken RDF.
84    #[inline]
85    pub fn lenient(mut self) -> Self {
86        self.lenient = true;
87        self
88    }
89
90    #[deprecated(note = "Use `lenient()` instead", since = "0.2.0")]
91    #[inline]
92    pub fn unchecked(self) -> Self {
93        self.lenient()
94    }
95
96    #[inline]
97    pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
98        self.base = Some(Iri::parse(base_iri.into())?);
99        Ok(self)
100    }
101
102    /// Parses a RDF/XML file from a [`Read`] implementation.
103    ///
104    /// Count the number of people:
105    /// ```
106    /// use oxirs_core::model::NamedNode;
107    /// use oxirs_core::{Predicate, Object};
108    /// use oxirs_core::rdfxml::RdfXmlParser;
109    ///
110    /// let file = br#"<?xml version="1.0"?>
111    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
112    ///  <rdf:Description rdf:about="http://example.com/foo">
113    ///    <rdf:type rdf:resource="http://schema.org/Person" />
114    ///    <schema:name>Foo</schema:name>
115    ///  </rdf:Description>
116    ///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
117    /// </rdf:RDF>"#;
118    ///
119    /// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
120    /// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
121    /// let mut count = 0;
122    /// for triple in RdfXmlParser::new().for_reader(file.as_ref()) {
123    ///     let triple = triple.expect("triple should be valid");
124    ///     if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
125    ///         count += 1;
126    ///     }
127    /// }
128    /// assert_eq!(2, count);
129    /// ```
130    pub fn for_reader<R: Read>(self, reader: R) -> ReaderRdfXmlParser<R> {
131        ReaderRdfXmlParser {
132            results: Vec::new(),
133            parser: self.into_internal(BufReader::new(reader)),
134            reader_buffer: Vec::default(),
135        }
136    }
137
138    /// Parses a RDF/XML file from a [`AsyncRead`] implementation.
139    ///
140    /// Count the number of people:
141    /// ```
142    /// # #[tokio::main(flavor = "current_thread")]
143    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
144    /// use oxirs_core::model::NamedNode;
145    /// use oxirs_core::{Predicate, Object};
146    /// use oxirs_core::rdfxml::RdfXmlParser;
147    ///
148    /// let file = br#"<?xml version="1.0"?>
149    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
150    ///   <rdf:Description rdf:about="http://example.com/foo">
151    ///     <rdf:type rdf:resource="http://schema.org/Person" />
152    ///     <schema:name>Foo</schema:name>
153    ///   </rdf:Description>
154    ///   <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
155    /// </rdf:RDF>"#;
156    ///
157    /// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
158    /// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
159    /// let mut count = 0;
160    /// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
161    /// while let Some(triple) = parser.next().await {
162    ///     let triple = triple.expect("triple should be valid");
163    ///     if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
164    ///         count += 1;
165    ///     }
166    /// }
167    /// assert_eq!(2, count);
168    /// # Ok(())
169    /// # }
170    /// ```
171    #[cfg(feature = "async-tokio")]
172    pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
173        self,
174        reader: R,
175    ) -> TokioAsyncReaderRdfXmlParser<R> {
176        TokioAsyncReaderRdfXmlParser {
177            results: Vec::new(),
178            parser: self.into_internal(AsyncBufReader::new(reader)),
179            reader_buffer: Vec::default(),
180        }
181    }
182
183    /// Parses a RDF/XML file from a byte slice.
184    ///
185    /// Count the number of people:
186    /// ```
187    /// use oxirs_core::model::NamedNode;
188    /// use oxirs_core::{Predicate, Object};
189    /// use oxirs_core::rdfxml::RdfXmlParser;
190    ///
191    /// let file = br#"<?xml version="1.0"?>
192    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
193    ///  <rdf:Description rdf:about="http://example.com/foo">
194    ///    <rdf:type rdf:resource="http://schema.org/Person" />
195    ///    <schema:name>Foo</schema:name>
196    ///  </rdf:Description>
197    ///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
198    /// </rdf:RDF>"#;
199    ///
200    /// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
201    /// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
202    /// let mut count = 0;
203    /// for triple in RdfXmlParser::new().for_slice(file) {
204    ///     let triple = triple.expect("triple should be valid");
205    ///     if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
206    ///         count += 1;
207    ///     }
208    /// }
209    /// assert_eq!(2, count);
210    /// ```
211    pub fn for_slice(self, slice: &[u8]) -> SliceRdfXmlParser<'_> {
212        SliceRdfXmlParser {
213            results: Vec::new(),
214            parser: self.into_internal(slice),
215            reader_buffer: Vec::default(),
216        }
217    }
218
219    pub(super) fn into_internal<T>(self, reader: T) -> InternalRdfXmlParser<T> {
220        use crate::rdfxml::parser_types::RdfXmlState;
221        let mut reader = NsReader::from_reader(reader);
222        reader.config_mut().expand_empty_elements = true;
223        InternalRdfXmlParser {
224            reader,
225            state: vec![RdfXmlState::Doc {
226                base_iri: self.base.clone(),
227            }],
228            custom_entities: HashMap::new(),
229            in_literal_depth: 0,
230            known_rdf_id: HashSet::default(),
231            is_end: false,
232            lenient: self.lenient,
233        }
234    }
235}
236
237/// Parses a RDF/XML file from a [`Read`] implementation.
238///
239/// Can be built using [`RdfXmlParser::for_reader`].
240///
241/// Count the number of people:
242/// ```
243/// use oxirs_core::model::NamedNode;
244/// use oxirs_core::{Predicate, Object};
245/// use oxirs_core::rdfxml::RdfXmlParser;
246///
247/// let file = br#"<?xml version="1.0"?>
248/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
249///  <rdf:Description rdf:about="http://example.com/foo">
250///    <rdf:type rdf:resource="http://schema.org/Person" />
251///    <schema:name>Foo</schema:name>
252///  </rdf:Description>
253///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
254/// </rdf:RDF>"#;
255///
256/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
257/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
258/// let mut count = 0;
259/// for triple in RdfXmlParser::new().for_reader(file.as_ref()) {
260///     let triple = triple.expect("triple should be valid");
261///     if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
262///         count += 1;
263///     }
264/// }
265/// assert_eq!(2, count);
266/// ```
267#[must_use]
268pub struct ReaderRdfXmlParser<R: Read> {
269    results: Vec<Triple>,
270    parser: InternalRdfXmlParser<BufReader<R>>,
271    reader_buffer: Vec<u8>,
272}
273
274impl<R: Read> Iterator for ReaderRdfXmlParser<R> {
275    type Item = Result<Triple, RdfXmlParseError>;
276
277    fn next(&mut self) -> Option<Self::Item> {
278        loop {
279            if let Some(triple) = self.results.pop() {
280                return Some(Ok(triple));
281            } else if self.parser.is_end {
282                return None;
283            }
284            if let Err(e) = self.parse_step() {
285                return Some(Err(e));
286            }
287        }
288    }
289}
290
291impl<R: Read> ReaderRdfXmlParser<R> {
292    /// The list of IRI prefixes considered at the current step of the parsing.
293    ///
294    /// This method returns (prefix name, prefix value) tuples.
295    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
296    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
297    ///
298    /// ```
299    /// use oxirs_core::rdfxml::RdfXmlParser;
300    ///
301    /// let file = br#"<?xml version="1.0"?>
302    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
303    ///  <rdf:Description rdf:about="http://example.com/foo">
304    ///    <rdf:type rdf:resource="http://schema.org/Person" />
305    ///    <schema:name>Foo</schema:name>
306    ///  </rdf:Description>
307    ///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
308    /// </rdf:RDF>"#;
309    ///
310    /// let mut parser = RdfXmlParser::new().for_reader(file.as_ref());
311    /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
312    ///
313    /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
314    /// assert_eq!(
315    ///     parser.prefixes().collect::<Vec<_>>(),
316    ///     [
317    ///         ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
318    ///         ("schema", "http://schema.org/")
319    ///     ]
320    /// ); // There are now prefixes
321    /// ```
322    pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
323        RdfXmlPrefixesIter {
324            inner: self.parser.reader.resolver().bindings(),
325            decoder: self.parser.reader.decoder(),
326            lenient: self.parser.lenient,
327        }
328    }
329
330    /// The base IRI considered at the current step of the parsing.
331    ///
332    /// ```
333    /// use oxirs_core::rdfxml::RdfXmlParser;
334    ///
335    /// let file = br#"<?xml version="1.0"?>
336    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="http://example.com/">
337    ///  <rdf:Description rdf:about="foo">
338    ///    <rdf:type rdf:resource="http://schema.org/Person" />
339    ///  </rdf:Description>
340    /// </rdf:RDF>"#;
341    ///
342    /// let mut parser = RdfXmlParser::new().for_reader(file.as_ref());
343    /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
344    ///
345    /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
346    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
347    /// ```
348    pub fn base_iri(&self) -> Option<&str> {
349        Some(self.parser.current_base_iri()?.as_str())
350    }
351
352    /// The current byte position in the input data.
353    pub fn buffer_position(&self) -> u64 {
354        self.parser.reader.buffer_position()
355    }
356
357    fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
358        self.reader_buffer.clear();
359        let event = self
360            .parser
361            .reader
362            .read_event_into(&mut self.reader_buffer)?;
363        self.parser.parse_event(event, &mut self.results)
364    }
365}
366
367/// Parses a RDF/XML file from a [`AsyncRead`] implementation.
368///
369/// Can be built using [`RdfXmlParser::for_tokio_async_reader`].
370///
371/// Count the number of people:
372/// ```
373/// # #[tokio::main(flavor = "current_thread")]
374/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
375/// use oxirs_core::model::NamedNode;
376/// use oxirs_core::{Predicate, Object};
377/// use oxirs_core::rdfxml::RdfXmlParser;
378///
379/// let file = br#"<?xml version="1.0"?>
380/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
381///   <rdf:Description rdf:about="http://example.com/foo">
382///     <rdf:type rdf:resource="http://schema.org/Person" />
383///     <schema:name>Foo</schema:name>
384///   </rdf:Description>
385///   <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
386/// </rdf:RDF>"#;
387///
388/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
389/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
390/// let mut count = 0;
391/// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
392/// while let Some(triple) = parser.next().await {
393///     let triple = triple.expect("triple should be valid");
394///     if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
395///         count += 1;
396///     }
397/// }
398/// assert_eq!(2, count);
399/// # Ok(())
400/// # }
401/// ```
402#[cfg(feature = "async-tokio")]
403#[must_use]
404pub struct TokioAsyncReaderRdfXmlParser<R: AsyncRead + Unpin> {
405    results: Vec<Triple>,
406    parser: InternalRdfXmlParser<AsyncBufReader<R>>,
407    reader_buffer: Vec<u8>,
408}
409
410#[cfg(feature = "async-tokio")]
411impl<R: AsyncRead + Unpin> TokioAsyncReaderRdfXmlParser<R> {
412    /// Reads the next triple or returns `None` if the file is finished.
413    pub async fn next(&mut self) -> Option<Result<Triple, RdfXmlParseError>> {
414        loop {
415            if let Some(triple) = self.results.pop() {
416                return Some(Ok(triple));
417            } else if self.parser.is_end {
418                return None;
419            }
420            if let Err(e) = self.parse_step().await {
421                return Some(Err(e));
422            }
423        }
424    }
425
426    /// The list of IRI prefixes considered at the current step of the parsing.
427    ///
428    /// This method returns (prefix name, prefix value) tuples.
429    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
430    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
431    ///
432    /// ```
433    /// # #[tokio::main(flavor = "current_thread")]
434    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
435    /// use oxirs_core::rdfxml::RdfXmlParser;
436    ///
437    /// let file = br#"<?xml version="1.0"?>
438    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
439    ///  <rdf:Description rdf:about="http://example.com/foo">
440    ///    <rdf:type rdf:resource="http://schema.org/Person" />
441    ///    <schema:name>Foo</schema:name>
442    ///  </rdf:Description>
443    ///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
444    /// </rdf:RDF>"#;
445    ///
446    /// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
447    /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
448    ///
449    /// parser.next().await.expect("async operation should succeed").expect("operation should succeed"); // We read the first triple
450    /// assert_eq!(
451    ///     parser.prefixes().collect::<Vec<_>>(),
452    ///     [
453    ///         ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
454    ///         ("schema", "http://schema.org/")
455    ///     ]
456    /// ); // There are now prefixes
457    /// //
458    /// # Ok(())
459    /// # }
460    /// ```
461    pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
462        RdfXmlPrefixesIter {
463            inner: self.parser.reader.resolver().bindings(),
464            decoder: self.parser.reader.decoder(),
465            lenient: self.parser.lenient,
466        }
467    }
468
469    /// The base IRI considered at the current step of the parsing.
470    ///
471    /// ```
472    /// # [tokio::main(flavor = "current_thread")]
473    /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
474    /// use oxirs_core::rdfxml::RdfXmlParser;
475    ///
476    /// let file = br#"<?xml version="1.0"?>
477    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="http://example.com/">
478    ///  <rdf:Description rdf:about="foo">
479    ///    <rdf:type rdf:resource="http://schema.org/Person" />
480    ///  </rdf:Description>
481    /// </rdf:RDF>"#;
482    ///
483    /// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
484    /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
485    ///
486    /// parser.next().await.expect("async operation should succeed").expect("operation should succeed"); // We read the first triple
487    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
488    /// # Ok(())
489    /// # }
490    /// ```
491    pub fn base_iri(&self) -> Option<&str> {
492        Some(self.parser.current_base_iri()?.as_str())
493    }
494
495    /// The current byte position in the input data.
496    pub fn buffer_position(&self) -> u64 {
497        self.parser.reader.buffer_position()
498    }
499
500    async fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
501        self.reader_buffer.clear();
502        let event = self
503            .parser
504            .reader
505            .read_event_into_async(&mut self.reader_buffer)
506            .await?;
507        self.parser.parse_event(event, &mut self.results)
508    }
509}
510
511/// Parses a RDF/XML file from a byte slice.
512///
513/// Can be built using [`RdfXmlParser::for_slice`].
514///
515/// Count the number of people:
516/// ```
517/// use oxirs_core::model::NamedNode;
518/// use oxirs_core::{Predicate, Object};
519/// use oxirs_core::rdfxml::RdfXmlParser;
520///
521/// let file = br#"<?xml version="1.0"?>
522/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
523///  <rdf:Description rdf:about="http://example.com/foo">
524///    <rdf:type rdf:resource="http://schema.org/Person" />
525///    <schema:name>Foo</schema:name>
526///  </rdf:Description>
527///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
528/// </rdf:RDF>"#;
529///
530/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
531/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
532/// let mut count = 0;
533/// for triple in RdfXmlParser::new().for_slice(file) {
534///     let triple = triple.expect("triple should be valid");
535///     if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
536///         count += 1;
537///     }
538/// }
539/// assert_eq!(2, count);
540/// ```
541#[must_use]
542pub struct SliceRdfXmlParser<'a> {
543    results: Vec<Triple>,
544    parser: InternalRdfXmlParser<&'a [u8]>,
545    reader_buffer: Vec<u8>,
546}
547
548impl Iterator for SliceRdfXmlParser<'_> {
549    type Item = Result<Triple, RdfXmlSyntaxError>;
550
551    fn next(&mut self) -> Option<Self::Item> {
552        loop {
553            if let Some(triple) = self.results.pop() {
554                return Some(Ok(triple));
555            } else if self.parser.is_end {
556                return None;
557            }
558            if let Err(RdfXmlParseError::Syntax(e)) = self.parse_step() {
559                // I/O errors can't happen
560                return Some(Err(e));
561            }
562        }
563    }
564}
565
566impl SliceRdfXmlParser<'_> {
567    /// The list of IRI prefixes considered at the current step of the parsing.
568    ///
569    /// This method returns (prefix name, prefix value) tuples.
570    /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
571    /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
572    ///
573    /// ```
574    /// use oxirs_core::rdfxml::RdfXmlParser;
575    ///
576    /// let file = br#"<?xml version="1.0"?>
577    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
578    ///  <rdf:Description rdf:about="http://example.com/foo">
579    ///    <rdf:type rdf:resource="http://schema.org/Person" />
580    ///    <schema:name>Foo</schema:name>
581    ///  </rdf:Description>
582    ///  <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
583    /// </rdf:RDF>"#;
584    ///
585    /// let mut parser = RdfXmlParser::new().for_slice(file);
586    /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
587    ///
588    /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
589    /// assert_eq!(
590    ///     parser.prefixes().collect::<Vec<_>>(),
591    ///     [
592    ///         ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
593    ///         ("schema", "http://schema.org/")
594    ///     ]
595    /// ); // There are now prefixes
596    /// ```
597    pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
598        RdfXmlPrefixesIter {
599            inner: self.parser.reader.resolver().bindings(),
600            decoder: self.parser.reader.decoder(),
601            lenient: self.parser.lenient,
602        }
603    }
604
605    /// The base IRI considered at the current step of the parsing.
606    ///
607    /// ```
608    /// use oxirs_core::rdfxml::RdfXmlParser;
609    ///
610    /// let file = br#"<?xml version="1.0"?>
611    /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="http://example.com/">
612    ///  <rdf:Description rdf:about="foo">
613    ///    <rdf:type rdf:resource="http://schema.org/Person" />
614    ///  </rdf:Description>
615    /// </rdf:RDF>"#;
616    ///
617    /// let mut parser = RdfXmlParser::new().for_slice(file);
618    /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
619    ///
620    /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
621    /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
622    /// ```
623    pub fn base_iri(&self) -> Option<&str> {
624        Some(self.parser.current_base_iri()?.as_str())
625    }
626
627    /// The current byte position in the input data.
628    pub fn buffer_position(&self) -> u64 {
629        self.parser.reader.buffer_position()
630    }
631
632    fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
633        self.reader_buffer.clear();
634        let event = self
635            .parser
636            .reader
637            .read_event_into(&mut self.reader_buffer)?;
638        self.parser.parse_event(event, &mut self.results)
639    }
640}
641
642/// Iterator on the file prefixes.
643///
644/// See [`ReaderRdfXmlParser::prefixes`].
645pub struct RdfXmlPrefixesIter<'a> {
646    inner: NamespaceBindingsIter<'a>,
647    decoder: Decoder,
648    lenient: bool,
649}
650
651impl<'a> Iterator for RdfXmlPrefixesIter<'a> {
652    type Item = (&'a str, &'a str);
653
654    #[inline]
655    fn next(&mut self) -> Option<Self::Item> {
656        loop {
657            let (key, value) = self.inner.next()?;
658            return Some((
659                match key {
660                    PrefixDeclaration::Default => "",
661                    PrefixDeclaration::Named(name) => {
662                        let Ok(Cow::Borrowed(name)) = self.decoder.decode(name) else {
663                            continue;
664                        };
665                        let Ok(Cow::Borrowed(name)) = unescape_with(name, |_| None) else {
666                            continue;
667                        };
668                        if !self.lenient && !is_nc_name(name) {
669                            continue; // We don't return invalid prefixes
670                        }
671                        name
672                    }
673                },
674                {
675                    let Ok(Cow::Borrowed(value)) = self.decoder.decode(value.0) else {
676                        continue;
677                    };
678                    let Ok(Cow::Borrowed(value)) = unescape_with(value, |_| None) else {
679                        continue;
680                    };
681                    if !self.lenient && Iri::parse(value).is_err() {
682                        continue; // We don't return invalid prefixes
683                    }
684                    value
685                },
686            ));
687        }
688    }
689
690    #[inline]
691    fn size_hint(&self) -> (usize, Option<usize>) {
692        self.inner.size_hint()
693    }
694}