use crate::model::{NamedOrBlankNode, Term, Triple};
use crate::rdfxml::error::{RdfXmlParseError, RdfXmlSyntaxError};
use crate::rdfxml::parser_types::InternalRdfXmlParser;
use oxiri::{Iri, IriParseError};
use quick_xml::escape::unescape_with;
use quick_xml::name::{NamespaceBindingsIter, PrefixDeclaration};
use quick_xml::{Decoder, NsReader};
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::io::{BufReader, Read};
#[cfg(feature = "async-tokio")]
use tokio::io::{AsyncRead, BufReader as AsyncBufReader};
use crate::rdfxml::utils::is_nc_name;
impl From<NamedOrBlankNode> for Term {
fn from(node: NamedOrBlankNode) -> Self {
match node {
NamedOrBlankNode::NamedNode(n) => Term::NamedNode(n),
NamedOrBlankNode::BlankNode(n) => Term::BlankNode(n),
}
}
}
#[derive(Default, Clone)]
#[must_use]
pub struct RdfXmlParser {
pub(super) lenient: bool,
pub(super) base: Option<Iri<String>>,
}
impl RdfXmlParser {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn lenient(mut self) -> Self {
self.lenient = true;
self
}
#[deprecated(note = "Use `lenient()` instead", since = "0.2.0")]
#[inline]
pub fn unchecked(self) -> Self {
self.lenient()
}
#[inline]
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Result<Self, IriParseError> {
self.base = Some(Iri::parse(base_iri.into())?);
Ok(self)
}
pub fn for_reader<R: Read>(self, reader: R) -> ReaderRdfXmlParser<R> {
ReaderRdfXmlParser {
results: Vec::new(),
parser: self.into_internal(BufReader::new(reader)),
reader_buffer: Vec::default(),
}
}
#[cfg(feature = "async-tokio")]
pub fn for_tokio_async_reader<R: AsyncRead + Unpin>(
self,
reader: R,
) -> TokioAsyncReaderRdfXmlParser<R> {
TokioAsyncReaderRdfXmlParser {
results: Vec::new(),
parser: self.into_internal(AsyncBufReader::new(reader)),
reader_buffer: Vec::default(),
}
}
pub fn for_slice(self, slice: &[u8]) -> SliceRdfXmlParser<'_> {
SliceRdfXmlParser {
results: Vec::new(),
parser: self.into_internal(slice),
reader_buffer: Vec::default(),
}
}
pub(super) fn into_internal<T>(self, reader: T) -> InternalRdfXmlParser<T> {
use crate::rdfxml::parser_types::RdfXmlState;
let mut reader = NsReader::from_reader(reader);
reader.config_mut().expand_empty_elements = true;
InternalRdfXmlParser {
reader,
state: vec![RdfXmlState::Doc {
base_iri: self.base.clone(),
}],
custom_entities: HashMap::new(),
in_literal_depth: 0,
known_rdf_id: HashSet::default(),
is_end: false,
lenient: self.lenient,
}
}
}
#[must_use]
pub struct ReaderRdfXmlParser<R: Read> {
results: Vec<Triple>,
parser: InternalRdfXmlParser<BufReader<R>>,
reader_buffer: Vec<u8>,
}
impl<R: Read> Iterator for ReaderRdfXmlParser<R> {
type Item = Result<Triple, RdfXmlParseError>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(triple) = self.results.pop() {
return Some(Ok(triple));
} else if self.parser.is_end {
return None;
}
if let Err(e) = self.parse_step() {
return Some(Err(e));
}
}
}
}
impl<R: Read> ReaderRdfXmlParser<R> {
pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
RdfXmlPrefixesIter {
inner: self.parser.reader.resolver().bindings(),
decoder: self.parser.reader.decoder(),
lenient: self.parser.lenient,
}
}
pub fn base_iri(&self) -> Option<&str> {
Some(self.parser.current_base_iri()?.as_str())
}
pub fn buffer_position(&self) -> u64 {
self.parser.reader.buffer_position()
}
fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
self.reader_buffer.clear();
let event = self
.parser
.reader
.read_event_into(&mut self.reader_buffer)?;
self.parser.parse_event(event, &mut self.results)
}
}
#[cfg(feature = "async-tokio")]
#[must_use]
pub struct TokioAsyncReaderRdfXmlParser<R: AsyncRead + Unpin> {
results: Vec<Triple>,
parser: InternalRdfXmlParser<AsyncBufReader<R>>,
reader_buffer: Vec<u8>,
}
#[cfg(feature = "async-tokio")]
impl<R: AsyncRead + Unpin> TokioAsyncReaderRdfXmlParser<R> {
pub async fn next(&mut self) -> Option<Result<Triple, RdfXmlParseError>> {
loop {
if let Some(triple) = self.results.pop() {
return Some(Ok(triple));
} else if self.parser.is_end {
return None;
}
if let Err(e) = self.parse_step().await {
return Some(Err(e));
}
}
}
pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
RdfXmlPrefixesIter {
inner: self.parser.reader.resolver().bindings(),
decoder: self.parser.reader.decoder(),
lenient: self.parser.lenient,
}
}
pub fn base_iri(&self) -> Option<&str> {
Some(self.parser.current_base_iri()?.as_str())
}
pub fn buffer_position(&self) -> u64 {
self.parser.reader.buffer_position()
}
async fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
self.reader_buffer.clear();
let event = self
.parser
.reader
.read_event_into_async(&mut self.reader_buffer)
.await?;
self.parser.parse_event(event, &mut self.results)
}
}
#[must_use]
pub struct SliceRdfXmlParser<'a> {
results: Vec<Triple>,
parser: InternalRdfXmlParser<&'a [u8]>,
reader_buffer: Vec<u8>,
}
impl Iterator for SliceRdfXmlParser<'_> {
type Item = Result<Triple, RdfXmlSyntaxError>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(triple) = self.results.pop() {
return Some(Ok(triple));
} else if self.parser.is_end {
return None;
}
if let Err(RdfXmlParseError::Syntax(e)) = self.parse_step() {
return Some(Err(e));
}
}
}
}
impl SliceRdfXmlParser<'_> {
pub fn prefixes(&self) -> RdfXmlPrefixesIter<'_> {
RdfXmlPrefixesIter {
inner: self.parser.reader.resolver().bindings(),
decoder: self.parser.reader.decoder(),
lenient: self.parser.lenient,
}
}
pub fn base_iri(&self) -> Option<&str> {
Some(self.parser.current_base_iri()?.as_str())
}
pub fn buffer_position(&self) -> u64 {
self.parser.reader.buffer_position()
}
fn parse_step(&mut self) -> Result<(), RdfXmlParseError> {
self.reader_buffer.clear();
let event = self
.parser
.reader
.read_event_into(&mut self.reader_buffer)?;
self.parser.parse_event(event, &mut self.results)
}
}
pub struct RdfXmlPrefixesIter<'a> {
inner: NamespaceBindingsIter<'a>,
decoder: Decoder,
lenient: bool,
}
impl<'a> Iterator for RdfXmlPrefixesIter<'a> {
type Item = (&'a str, &'a str);
#[inline]
fn next(&mut self) -> Option<Self::Item> {
loop {
let (key, value) = self.inner.next()?;
return Some((
match key {
PrefixDeclaration::Default => "",
PrefixDeclaration::Named(name) => {
let Ok(Cow::Borrowed(name)) = self.decoder.decode(name) else {
continue;
};
let Ok(Cow::Borrowed(name)) = unescape_with(name, |_| None) else {
continue;
};
if !self.lenient && !is_nc_name(name) {
continue; }
name
}
},
{
let Ok(Cow::Borrowed(value)) = self.decoder.decode(value.0) else {
continue;
};
let Ok(Cow::Borrowed(value)) = unescape_with(value, |_| None) else {
continue;
};
if !self.lenient && Iri::parse(value).is_err() {
continue; }
value
},
));
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}