oxirs_core/rdfxml/parser_lexer.rs
1//! Public-facing RDF/XML parser types — tokenizer / lexical phase.
2//!
3//! Contains `RdfXmlParser` (the entry-point builder), reader/slice/async
4//! parser wrappers, and the prefix iterator `RdfXmlPrefixesIter`.
5
6use crate::model::{NamedOrBlankNode, Term, Triple};
7use crate::rdfxml::error::{RdfXmlParseError, RdfXmlSyntaxError};
8use crate::rdfxml::parser_types::InternalRdfXmlParser;
9use oxiri::{Iri, IriParseError};
10use quick_xml::escape::unescape_with;
11use quick_xml::name::{NamespaceBindingsIter, PrefixDeclaration};
12use quick_xml::{Decoder, NsReader};
13use std::borrow::Cow;
14use std::collections::{HashMap, HashSet};
15use std::io::{BufReader, Read};
16#[cfg(feature = "async-tokio")]
17use tokio::io::{AsyncRead, BufReader as AsyncBufReader};
18
19use crate::rdfxml::utils::is_nc_name;
20
21impl From<NamedOrBlankNode> for Term {
22 fn from(node: NamedOrBlankNode) -> Self {
23 match node {
24 NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n),
25 NamedOrBlankNode::BlankNode(n) => Term::BlankNode(n),
26 }
27 }
28}
29
30/// A [RDF/XML](https://www.w3.org/TR/rdf-syntax-grammar/) streaming parser.
31///
32/// It reads the file in streaming.
33/// It does not keep data in memory except a stack for handling nested XML tags, and a set of all
34/// seen `rdf:ID`s to detect duplicate ids and fail according to the specification.
35///
36/// Its performances are not optimized yet and hopefully could be significantly enhanced by reducing the
37/// number of allocations and copies done by the parser.
38///
39/// Count the number of people:
40/// ```
41/// use oxirs_core::model::NamedNode;
42/// use oxirs_core::{Predicate, Object};
43/// use oxirs_core::rdfxml::RdfXmlParser;
44///
45/// let file = br#"<?xml version="1.0"?>
46/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
47/// <rdf:Description rdf:about="http://example.com/foo">
48/// <rdf:type rdf:resource="http://schema.org/Person" />
49/// <schema:name>Foo</schema:name>
50/// </rdf:Description>
51/// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
52/// </rdf:RDF>"#;
53///
54/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
55/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
56/// let mut count = 0;
57/// for triple in RdfXmlParser::new().for_reader(file.as_ref()) {
58/// let triple = triple.expect("triple should be valid");
59/// if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
60/// count += 1;
61/// }
62/// }
63/// assert_eq!(2, count);
64/// ```
65#[derive(Default, Clone)]
66#[must_use]
67pub struct RdfXmlParser {
68 pub(super) lenient: bool,
69 pub(super) base: Option<Iri<String>>,
70}
71
72impl RdfXmlParser {
73 /// Builds a new [`RdfXmlParser`].
74 #[inline]
75 pub fn new() -> Self {
76 Self::default()
77 }
78
79 /// Assumes the file is valid to make parsing faster.
80 ///
81 /// It will skip some validations.
82 ///
83 /// Note that if the file is actually not valid, the parser might emit broken RDF.
84 #[inline]
85 pub fn lenient(mut self) -> Self {
86 self.lenient = true;
87 self
88 }
89
90 #[deprecated(note = "Use `lenient()` instead", since = "0.2.0")]
91 #[inline]
92 pub fn unchecked(self) -> Self {
93 self.lenient()
94 }
95
96 #[inline]
97 pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
98 self.base = Some(Iri::parse(base_iri.into())?);
99 Ok(self)
100 }
101
102 /// Parses a RDF/XML file from a [`Read`] implementation.
103 ///
104 /// Count the number of people:
105 /// ```
106 /// use oxirs_core::model::NamedNode;
107 /// use oxirs_core::{Predicate, Object};
108 /// use oxirs_core::rdfxml::RdfXmlParser;
109 ///
110 /// let file = br#"<?xml version="1.0"?>
111 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
112 /// <rdf:Description rdf:about="http://example.com/foo">
113 /// <rdf:type rdf:resource="http://schema.org/Person" />
114 /// <schema:name>Foo</schema:name>
115 /// </rdf:Description>
116 /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
117 /// </rdf:RDF>"#;
118 ///
119 /// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
120 /// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
121 /// let mut count = 0;
122 /// for triple in RdfXmlParser::new().for_reader(file.as_ref()) {
123 /// let triple = triple.expect("triple should be valid");
124 /// if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
125 /// count += 1;
126 /// }
127 /// }
128 /// assert_eq!(2, count);
129 /// ```
130 pub fn for_reader<R: Read>(self, reader: R) -> ReaderRdfXmlParser<R> {
131 ReaderRdfXmlParser {
132 results: Vec::new(),
133 parser: self.into_internal(BufReader::new(reader)),
134 reader_buffer: Vec::default(),
135 }
136 }
137
138 /// Parses a RDF/XML file from a [`AsyncRead`] implementation.
139 ///
140 /// Count the number of people:
141 /// ```
142 /// # #[tokio::main(flavor = "current_thread")]
143 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
144 /// use oxirs_core::model::NamedNode;
145 /// use oxirs_core::{Predicate, Object};
146 /// use oxirs_core::rdfxml::RdfXmlParser;
147 ///
148 /// let file = br#"<?xml version="1.0"?>
149 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
150 /// <rdf:Description rdf:about="http://example.com/foo">
151 /// <rdf:type rdf:resource="http://schema.org/Person" />
152 /// <schema:name>Foo</schema:name>
153 /// </rdf:Description>
154 /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
155 /// </rdf:RDF>"#;
156 ///
157 /// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
158 /// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
159 /// let mut count = 0;
160 /// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
161 /// while let Some(triple) = parser.next().await {
162 /// let triple = triple.expect("triple should be valid");
163 /// if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
164 /// count += 1;
165 /// }
166 /// }
167 /// assert_eq!(2, count);
168 /// # Ok(())
169 /// # }
170 /// ```
171 #[cfg(feature = "async-tokio")]
172 pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
173 self,
174 reader: R,
175 ) -> TokioAsyncReaderRdfXmlParser<R> {
176 TokioAsyncReaderRdfXmlParser {
177 results: Vec::new(),
178 parser: self.into_internal(AsyncBufReader::new(reader)),
179 reader_buffer: Vec::default(),
180 }
181 }
182
183 /// Parses a RDF/XML file from a byte slice.
184 ///
185 /// Count the number of people:
186 /// ```
187 /// use oxirs_core::model::NamedNode;
188 /// use oxirs_core::{Predicate, Object};
189 /// use oxirs_core::rdfxml::RdfXmlParser;
190 ///
191 /// let file = br#"<?xml version="1.0"?>
192 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
193 /// <rdf:Description rdf:about="http://example.com/foo">
194 /// <rdf:type rdf:resource="http://schema.org/Person" />
195 /// <schema:name>Foo</schema:name>
196 /// </rdf:Description>
197 /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
198 /// </rdf:RDF>"#;
199 ///
200 /// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
201 /// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
202 /// let mut count = 0;
203 /// for triple in RdfXmlParser::new().for_slice(file) {
204 /// let triple = triple.expect("triple should be valid");
205 /// if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
206 /// count += 1;
207 /// }
208 /// }
209 /// assert_eq!(2, count);
210 /// ```
211 pub fn for_slice(self, slice: &[u8]) -> SliceRdfXmlParser<'_> {
212 SliceRdfXmlParser {
213 results: Vec::new(),
214 parser: self.into_internal(slice),
215 reader_buffer: Vec::default(),
216 }
217 }
218
219 pub(super) fn into_internal<T>(self, reader: T) -> InternalRdfXmlParser<T> {
220 use crate::rdfxml::parser_types::RdfXmlState;
221 let mut reader = NsReader::from_reader(reader);
222 reader.config_mut().expand_empty_elements = true;
223 InternalRdfXmlParser {
224 reader,
225 state: vec![RdfXmlState::Doc {
226 base_iri: self.base.clone(),
227 }],
228 custom_entities: HashMap::new(),
229 in_literal_depth: 0,
230 known_rdf_id: HashSet::default(),
231 is_end: false,
232 lenient: self.lenient,
233 }
234 }
235}
236
237/// Parses a RDF/XML file from a [`Read`] implementation.
238///
239/// Can be built using [`RdfXmlParser::for_reader`].
240///
241/// Count the number of people:
242/// ```
243/// use oxirs_core::model::NamedNode;
244/// use oxirs_core::{Predicate, Object};
245/// use oxirs_core::rdfxml::RdfXmlParser;
246///
247/// let file = br#"<?xml version="1.0"?>
248/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
249/// <rdf:Description rdf:about="http://example.com/foo">
250/// <rdf:type rdf:resource="http://schema.org/Person" />
251/// <schema:name>Foo</schema:name>
252/// </rdf:Description>
253/// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
254/// </rdf:RDF>"#;
255///
256/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
257/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
258/// let mut count = 0;
259/// for triple in RdfXmlParser::new().for_reader(file.as_ref()) {
260/// let triple = triple.expect("triple should be valid");
261/// if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
262/// count += 1;
263/// }
264/// }
265/// assert_eq!(2, count);
266/// ```
267#[must_use]
268pub struct ReaderRdfXmlParser<R: Read> {
269 results: Vec<Triple>,
270 parser: InternalRdfXmlParser<BufReader<R>>,
271 reader_buffer: Vec<u8>,
272}
273
274impl<R: Read> Iterator for ReaderRdfXmlParser<R> {
275 type Item = Result<Triple, RdfXmlParseError>;
276
277 fn next(&mut self) -> Option<Self::Item> {
278 loop {
279 if let Some(triple) = self.results.pop() {
280 return Some(Ok(triple));
281 } else if self.parser.is_end {
282 return None;
283 }
284 if let Err(e) = self.parse_step() {
285 return Some(Err(e));
286 }
287 }
288 }
289}
290
291impl<R: Read> ReaderRdfXmlParser<R> {
292 /// The list of IRI prefixes considered at the current step of the parsing.
293 ///
294 /// This method returns (prefix name, prefix value) tuples.
295 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
296 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
297 ///
298 /// ```
299 /// use oxirs_core::rdfxml::RdfXmlParser;
300 ///
301 /// let file = br#"<?xml version="1.0"?>
302 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
303 /// <rdf:Description rdf:about="http://example.com/foo">
304 /// <rdf:type rdf:resource="http://schema.org/Person" />
305 /// <schema:name>Foo</schema:name>
306 /// </rdf:Description>
307 /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
308 /// </rdf:RDF>"#;
309 ///
310 /// let mut parser = RdfXmlParser::new().for_reader(file.as_ref());
311 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
312 ///
313 /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
314 /// assert_eq!(
315 /// parser.prefixes().collect::<Vec<_>>(),
316 /// [
317 /// ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
318 /// ("schema", "http://schema.org/")
319 /// ]
320 /// ); // There are now prefixes
321 /// ```
322 pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
323 RdfXmlPrefixesIter {
324 inner: self.parser.reader.resolver().bindings(),
325 decoder: self.parser.reader.decoder(),
326 lenient: self.parser.lenient,
327 }
328 }
329
330 /// The base IRI considered at the current step of the parsing.
331 ///
332 /// ```
333 /// use oxirs_core::rdfxml::RdfXmlParser;
334 ///
335 /// let file = br#"<?xml version="1.0"?>
336 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="http://example.com/">
337 /// <rdf:Description rdf:about="foo">
338 /// <rdf:type rdf:resource="http://schema.org/Person" />
339 /// </rdf:Description>
340 /// </rdf:RDF>"#;
341 ///
342 /// let mut parser = RdfXmlParser::new().for_reader(file.as_ref());
343 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
344 ///
345 /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
346 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
347 /// ```
348 pub fn base_iri(&self) -> Option<&str> {
349 Some(self.parser.current_base_iri()?.as_str())
350 }
351
352 /// The current byte position in the input data.
353 pub fn buffer_position(&self) -> u64 {
354 self.parser.reader.buffer_position()
355 }
356
357 fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
358 self.reader_buffer.clear();
359 let event = self
360 .parser
361 .reader
362 .read_event_into(&mut self.reader_buffer)?;
363 self.parser.parse_event(event, &mut self.results)
364 }
365}
366
367/// Parses a RDF/XML file from a [`AsyncRead`] implementation.
368///
369/// Can be built using [`RdfXmlParser::for_tokio_async_reader`].
370///
371/// Count the number of people:
372/// ```
373/// # #[tokio::main(flavor = "current_thread")]
374/// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
375/// use oxirs_core::model::NamedNode;
376/// use oxirs_core::{Predicate, Object};
377/// use oxirs_core::rdfxml::RdfXmlParser;
378///
379/// let file = br#"<?xml version="1.0"?>
380/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
381/// <rdf:Description rdf:about="http://example.com/foo">
382/// <rdf:type rdf:resource="http://schema.org/Person" />
383/// <schema:name>Foo</schema:name>
384/// </rdf:Description>
385/// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
386/// </rdf:RDF>"#;
387///
388/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
389/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
390/// let mut count = 0;
391/// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
392/// while let Some(triple) = parser.next().await {
393/// let triple = triple.expect("triple should be valid");
394/// if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
395/// count += 1;
396/// }
397/// }
398/// assert_eq!(2, count);
399/// # Ok(())
400/// # }
401/// ```
402#[cfg(feature = "async-tokio")]
403#[must_use]
404pub struct TokioAsyncReaderRdfXmlParser<R: AsyncRead + Unpin> {
405 results: Vec<Triple>,
406 parser: InternalRdfXmlParser<AsyncBufReader<R>>,
407 reader_buffer: Vec<u8>,
408}
409
410#[cfg(feature = "async-tokio")]
411impl<R: AsyncRead + Unpin> TokioAsyncReaderRdfXmlParser<R> {
412 /// Reads the next triple or returns `None` if the file is finished.
413 pub async fn next(&mut self) -> Option<Result<Triple, RdfXmlParseError>> {
414 loop {
415 if let Some(triple) = self.results.pop() {
416 return Some(Ok(triple));
417 } else if self.parser.is_end {
418 return None;
419 }
420 if let Err(e) = self.parse_step().await {
421 return Some(Err(e));
422 }
423 }
424 }
425
426 /// The list of IRI prefixes considered at the current step of the parsing.
427 ///
428 /// This method returns (prefix name, prefix value) tuples.
429 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
430 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
431 ///
432 /// ```
433 /// # #[tokio::main(flavor = "current_thread")]
434 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
435 /// use oxirs_core::rdfxml::RdfXmlParser;
436 ///
437 /// let file = br#"<?xml version="1.0"?>
438 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
439 /// <rdf:Description rdf:about="http://example.com/foo">
440 /// <rdf:type rdf:resource="http://schema.org/Person" />
441 /// <schema:name>Foo</schema:name>
442 /// </rdf:Description>
443 /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
444 /// </rdf:RDF>"#;
445 ///
446 /// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
447 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
448 ///
449 /// parser.next().await.expect("async operation should succeed").expect("operation should succeed"); // We read the first triple
450 /// assert_eq!(
451 /// parser.prefixes().collect::<Vec<_>>(),
452 /// [
453 /// ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
454 /// ("schema", "http://schema.org/")
455 /// ]
456 /// ); // There are now prefixes
457 /// //
458 /// # Ok(())
459 /// # }
460 /// ```
461 pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
462 RdfXmlPrefixesIter {
463 inner: self.parser.reader.resolver().bindings(),
464 decoder: self.parser.reader.decoder(),
465 lenient: self.parser.lenient,
466 }
467 }
468
469 /// The base IRI considered at the current step of the parsing.
470 ///
471 /// ```
472 /// # [tokio::main(flavor = "current_thread")]
473 /// # async fn main() -> Result<(), Box<dyn std::error::Error>> {
474 /// use oxirs_core::rdfxml::RdfXmlParser;
475 ///
476 /// let file = br#"<?xml version="1.0"?>
477 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="http://example.com/">
478 /// <rdf:Description rdf:about="foo">
479 /// <rdf:type rdf:resource="http://schema.org/Person" />
480 /// </rdf:Description>
481 /// </rdf:RDF>"#;
482 ///
483 /// let mut parser = RdfXmlParser::new().for_tokio_async_reader(file.as_ref());
484 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
485 ///
486 /// parser.next().await.expect("async operation should succeed").expect("operation should succeed"); // We read the first triple
487 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
488 /// # Ok(())
489 /// # }
490 /// ```
491 pub fn base_iri(&self) -> Option<&str> {
492 Some(self.parser.current_base_iri()?.as_str())
493 }
494
495 /// The current byte position in the input data.
496 pub fn buffer_position(&self) -> u64 {
497 self.parser.reader.buffer_position()
498 }
499
500 async fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
501 self.reader_buffer.clear();
502 let event = self
503 .parser
504 .reader
505 .read_event_into_async(&mut self.reader_buffer)
506 .await?;
507 self.parser.parse_event(event, &mut self.results)
508 }
509}
510
511/// Parses a RDF/XML file from a byte slice.
512///
513/// Can be built using [`RdfXmlParser::for_slice`].
514///
515/// Count the number of people:
516/// ```
517/// use oxirs_core::model::NamedNode;
518/// use oxirs_core::{Predicate, Object};
519/// use oxirs_core::rdfxml::RdfXmlParser;
520///
521/// let file = br#"<?xml version="1.0"?>
522/// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
523/// <rdf:Description rdf:about="http://example.com/foo">
524/// <rdf:type rdf:resource="http://schema.org/Person" />
525/// <schema:name>Foo</schema:name>
526/// </rdf:Description>
527/// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
528/// </rdf:RDF>"#;
529///
530/// let schema_person = NamedNode::new("http://schema.org/Person").expect("valid IRI");
531/// let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type").expect("valid IRI");
532/// let mut count = 0;
533/// for triple in RdfXmlParser::new().for_slice(file) {
534/// let triple = triple.expect("triple should be valid");
535/// if matches!(triple.predicate(), oxirs_core::Predicate::NamedNode(n) if n == &rdf_type) && matches!(triple.object(), oxirs_core::Object::NamedNode(n) if n == &schema_person) {
536/// count += 1;
537/// }
538/// }
539/// assert_eq!(2, count);
540/// ```
541#[must_use]
542pub struct SliceRdfXmlParser<'a> {
543 results: Vec<Triple>,
544 parser: InternalRdfXmlParser<&'a [u8]>,
545 reader_buffer: Vec<u8>,
546}
547
548impl Iterator for SliceRdfXmlParser<'_> {
549 type Item = Result<Triple, RdfXmlSyntaxError>;
550
551 fn next(&mut self) -> Option<Self::Item> {
552 loop {
553 if let Some(triple) = self.results.pop() {
554 return Some(Ok(triple));
555 } else if self.parser.is_end {
556 return None;
557 }
558 if let Err(RdfXmlParseError::Syntax(e)) = self.parse_step() {
559 // I/O errors can't happen
560 return Some(Err(e));
561 }
562 }
563 }
564}
565
566impl SliceRdfXmlParser<'_> {
567 /// The list of IRI prefixes considered at the current step of the parsing.
568 ///
569 /// This method returns (prefix name, prefix value) tuples.
570 /// It is empty at the beginning of the parsing and gets updated when prefixes are encountered.
571 /// It should be full at the end of the parsing (but if a prefix is overridden, only the latest version will be returned).
572 ///
573 /// ```
574 /// use oxirs_core::rdfxml::RdfXmlParser;
575 ///
576 /// let file = br#"<?xml version="1.0"?>
577 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:schema="http://schema.org/">
578 /// <rdf:Description rdf:about="http://example.com/foo">
579 /// <rdf:type rdf:resource="http://schema.org/Person" />
580 /// <schema:name>Foo</schema:name>
581 /// </rdf:Description>
582 /// <schema:Person rdf:about="http://example.com/bar" schema:name="Bar" />
583 /// </rdf:RDF>"#;
584 ///
585 /// let mut parser = RdfXmlParser::new().for_slice(file);
586 /// assert_eq!(parser.prefixes().collect::<Vec<_>>(), []); // No prefix at the beginning
587 ///
588 /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
589 /// assert_eq!(
590 /// parser.prefixes().collect::<Vec<_>>(),
591 /// [
592 /// ("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"),
593 /// ("schema", "http://schema.org/")
594 /// ]
595 /// ); // There are now prefixes
596 /// ```
597 pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
598 RdfXmlPrefixesIter {
599 inner: self.parser.reader.resolver().bindings(),
600 decoder: self.parser.reader.decoder(),
601 lenient: self.parser.lenient,
602 }
603 }
604
605 /// The base IRI considered at the current step of the parsing.
606 ///
607 /// ```
608 /// use oxirs_core::rdfxml::RdfXmlParser;
609 ///
610 /// let file = br#"<?xml version="1.0"?>
611 /// <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xml:base="http://example.com/">
612 /// <rdf:Description rdf:about="foo">
613 /// <rdf:type rdf:resource="http://schema.org/Person" />
614 /// </rdf:Description>
615 /// </rdf:RDF>"#;
616 ///
617 /// let mut parser = RdfXmlParser::new().for_slice(file);
618 /// assert!(parser.base_iri().is_none()); // No base at the beginning because none has been given to the parser.
619 ///
620 /// parser.next().expect("should have next item").expect("operation should succeed"); // We read the first triple
621 /// assert_eq!(parser.base_iri(), Some("http://example.com/")); // There is now a base IRI.
622 /// ```
623 pub fn base_iri(&self) -> Option<&str> {
624 Some(self.parser.current_base_iri()?.as_str())
625 }
626
627 /// The current byte position in the input data.
628 pub fn buffer_position(&self) -> u64 {
629 self.parser.reader.buffer_position()
630 }
631
632 fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
633 self.reader_buffer.clear();
634 let event = self
635 .parser
636 .reader
637 .read_event_into(&mut self.reader_buffer)?;
638 self.parser.parse_event(event, &mut self.results)
639 }
640}
641
642/// Iterator on the file prefixes.
643///
644/// See [`ReaderRdfXmlParser::prefixes`].
645pub struct RdfXmlPrefixesIter<'a> {
646 inner: NamespaceBindingsIter<'a>,
647 decoder: Decoder,
648 lenient: bool,
649}
650
651impl<'a> Iterator for RdfXmlPrefixesIter<'a> {
652 type Item = (&'a str, &'a str);
653
654 #[inline]
655 fn next(&mut self) -> Option<Self::Item> {
656 loop {
657 let (key, value) = self.inner.next()?;
658 return Some((
659 match key {
660 PrefixDeclaration::Default => "",
661 PrefixDeclaration::Named(name) => {
662 let Ok(Cow::Borrowed(name)) = self.decoder.decode(name) else {
663 continue;
664 };
665 let Ok(Cow::Borrowed(name)) = unescape_with(name, |_| None) else {
666 continue;
667 };
668 if !self.lenient && !is_nc_name(name) {
669 continue; // We don't return invalid prefixes
670 }
671 name
672 }
673 },
674 {
675 let Ok(Cow::Borrowed(value)) = self.decoder.decode(value.0) else {
676 continue;
677 };
678 let Ok(Cow::Borrowed(value)) = unescape_with(value, |_| None) else {
679 continue;
680 };
681 if !self.lenient && Iri::parse(value).is_err() {
682 continue; // We don't return invalid prefixes
683 }
684 value
685 },
686 ));
687 }
688 }
689
690 #[inline]
691 fn size_hint(&self) -> (usize, Option<usize>) {
692 self.inner.size_hint()
693 }
694}