use crate::error::*;
use crate::shared::*;
use crate::triple_allocator::TripleAllocator;
use crate::utils::*;
use rio_api::model::*;
use rio_api::parser::*;
use std::io::BufRead;
pub struct NTriplesParser<R: BufRead> {
read: LookAheadByteReader<R>,
triple_alloc: TripleAllocator,
}
impl<R: BufRead> NTriplesParser<R> {
pub fn new(reader: R) -> Self {
Self {
read: LookAheadByteReader::new(reader),
triple_alloc: TripleAllocator::new(),
}
}
}
impl<R: BufRead> TriplesParser for NTriplesParser<R> {
type Error = TurtleError;
fn parse_step<E: From<TurtleError>>(
&mut self,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
match parse_triple_line(&mut self.read, &mut self.triple_alloc) {
Ok(true) => match on_triple(*self.triple_alloc.top()) {
Ok(()) => {
self.triple_alloc.pop_top_triple();
debug_assert_eq!(self.triple_alloc.complete_len(), 0);
debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
Ok(())
}
Err(err) => {
self.triple_alloc.clear();
Err(err)
}
},
Ok(false) => Ok(()),
Err(error) => {
self.read.consume_line_end()?;
self.triple_alloc.clear();
Err(E::from(error))
}
}
}
fn is_end(&self) -> bool {
self.read.current().is_none()
}
}
pub struct NQuadsParser<R: BufRead> {
read: LookAheadByteReader<R>,
triple_alloc: TripleAllocator,
graph_name_buf: String,
}
impl<R: BufRead> NQuadsParser<R> {
pub fn new(reader: R) -> Self {
Self {
read: LookAheadByteReader::new(reader),
triple_alloc: TripleAllocator::new(),
graph_name_buf: String::default(),
}
}
}
impl<R: BufRead> QuadsParser for NQuadsParser<R> {
type Error = TurtleError;
fn parse_step<E: From<TurtleError>>(
&mut self,
on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
) -> Result<(), E> {
match parse_quad_line(
&mut self.read,
&mut self.triple_alloc,
&mut self.graph_name_buf,
) {
Ok(Some(opt_graph_name)) => match on_quad(self.triple_alloc.top_quad(opt_graph_name)) {
Ok(()) => {
self.triple_alloc.pop_top_triple();
debug_assert_eq!(self.triple_alloc.complete_len(), 0);
debug_assert_eq!(self.triple_alloc.incomplete_len(), 0);
Ok(())
}
Err(err) => {
self.triple_alloc.clear();
Err(err)
}
},
Ok(None) => Ok(()),
Err(error) => {
self.read.consume_line_end()?;
self.triple_alloc.clear();
Err(E::from(error))
}
}
}
fn is_end(&self) -> bool {
self.read.current().is_none()
}
}
fn parse_triple_line(
read: &mut LookAheadByteReader<impl BufRead>,
triple_alloc: &mut TripleAllocator,
) -> Result<bool, TurtleError> {
skip_whitespace(read)?;
if matches!(
read.current(),
None | Some(b'#') | Some(b'\r') | Some(b'\n')
) {
skip_until_eol(read)?;
return Ok(false);
}
parse_triple(read, triple_alloc)?;
read.check_is_current(b'.')?;
read.consume()?;
skip_whitespace(read)?;
match read.current() {
None | Some(b'#') | Some(b'\r') | Some(b'\n') => skip_until_eol(read)?,
_ => read.unexpected_char_error()?,
}
Ok(true)
}
fn parse_triple(
read: &mut LookAheadByteReader<impl BufRead>,
triple_alloc: &mut TripleAllocator,
) -> Result<(), TurtleError> {
triple_alloc.push_triple_start();
parse_subject(read, triple_alloc)?;
skip_whitespace(read)?;
triple_alloc.try_push_predicate(|b| parse_iriref(read, b))?;
skip_whitespace(read)?;
parse_object(read, triple_alloc)?;
skip_whitespace(read)?;
Ok(())
}
fn parse_quad_line<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
triple_alloc: &mut TripleAllocator,
graph_name_buf: &'a mut String,
) -> Result<Option<Option<GraphName<'a>>>, TurtleError> {
skip_whitespace(read)?;
if matches!(
read.current(),
None | Some(b'#') | Some(b'\r') | Some(b'\n')
) {
skip_until_eol(read)?;
return Ok(None);
}
parse_triple(read, triple_alloc)?;
let opt_graph_name = match read.current() {
Some(b'<') | Some(b'_') => {
graph_name_buf.clear();
Some(parse_graph_name(read, graph_name_buf)?)
}
_ => None,
};
skip_whitespace(read)?;
read.check_is_current(b'.')?;
read.consume()?;
skip_whitespace(read)?;
match read.current() {
None | Some(b'#') | Some(b'\r') | Some(b'\n') => skip_until_eol(read)?,
_ => read.unexpected_char_error()?,
}
Ok(Some(opt_graph_name))
}
fn parse_subject(
read: &mut LookAheadByteReader<impl BufRead>,
triple_alloc: &mut TripleAllocator,
) -> Result<(), TurtleError> {
match read.required_current()? {
b'<' => match read.required_next()? {
b'<' => {
parse_quoted_triple(read, triple_alloc)?;
triple_alloc.push_subject_triple();
Ok(())
}
_ => triple_alloc.try_push_subject(|b| parse_iriref(read, b).map(Subject::from)),
},
b'_' => {
triple_alloc.try_push_subject(|b| parse_blank_node_label(read, b).map(Subject::from))
}
_ => read.unexpected_char_error(),
}
}
fn parse_object(
read: &mut LookAheadByteReader<impl BufRead>,
triple_alloc: &mut TripleAllocator,
) -> Result<(), TurtleError> {
match read.required_current()? {
b'<' => match read.required_next()? {
b'<' => {
parse_quoted_triple(read, triple_alloc)?;
triple_alloc.push_object_triple();
Ok(())
}
_ => triple_alloc.try_push_object(|b, _| parse_iriref(read, b).map(Term::from)),
},
b'_' => {
triple_alloc.try_push_object(|b, _| parse_blank_node_label(read, b).map(Term::from))
}
b'"' => triple_alloc.try_push_object(|b1, b2| parse_literal(read, b1, b2).map(Term::from)),
_ => read.unexpected_char_error(),
}
}
fn parse_quoted_triple(
read: &mut LookAheadByteReader<impl BufRead>,
triple_alloc: &mut TripleAllocator,
) -> Result<(), TurtleError> {
debug_assert_eq!(read.current(), Some(b'<'));
debug_assert_eq!(read.next()?, Some(b'<'));
read.increment_stack_size()?;
read.consume_many(2)?;
skip_whitespace(read)?;
parse_triple(read, triple_alloc)?;
read.check_is_current(b'>')?;
read.consume()?;
read.check_is_current(b'>')?;
read.consume()?;
read.decrement_stack_size();
skip_whitespace(read)
}
fn parse_graph_name<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
) -> Result<GraphName<'a>, TurtleError> {
match read.required_current()? {
b'<' => Ok(parse_iriref(read, buffer)?.into()),
b'_' => Ok(parse_blank_node_label(read, buffer)?.into()),
_ => read.unexpected_char_error(),
}
}
pub(crate) fn parse_literal<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
annotation_buffer: &'a mut String,
) -> Result<Literal<'a>, TurtleError> {
parse_string_literal_quote(read, buffer)?;
skip_whitespace(read)?;
match read.current() {
Some(b'@') => {
parse_langtag(read, annotation_buffer)?;
Ok(Literal::LanguageTaggedString {
value: buffer,
language: annotation_buffer,
})
}
Some(b'^') => {
read.consume()?;
read.check_is_current(b'^')?;
read.consume()?;
skip_whitespace(read)?;
Ok(Literal::Typed {
value: buffer,
datatype: parse_iriref(read, annotation_buffer)?,
})
}
_ => Ok(Literal::Simple { value: buffer }),
}
}
pub(crate) fn skip_whitespace(
read: &mut LookAheadByteReader<impl BufRead>,
) -> Result<(), TurtleError> {
loop {
match read.current() {
Some(b' ') | Some(b'\t') => read.consume()?,
_ => return Ok(()),
}
}
}
pub(crate) fn skip_until_eol(
read: &mut LookAheadByteReader<impl BufRead>,
) -> Result<(), TurtleError> {
loop {
match read.current() {
None => return Ok(()),
Some(b'\n') => {
read.consume()?;
return Ok(());
}
_ => (),
}
read.consume()?;
}
}
pub(crate) fn parse_iriref<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
) -> Result<NamedNode<'a>, TurtleError> {
parse_iriref_absolute(read, buffer)?;
Ok(NamedNode { iri: buffer })
}
#[cfg(test)]
mod test {
#[test]
fn nquads_star_valid_quad() -> Result<(), Box<dyn std::error::Error>> {
use crate::{NQuadsParser, TurtleError};
use rio_api::parser::QuadsParser;
let file = b"<< <tag:a> <tag:b> <tag:c> >> <tag:d> << <tag:e> <tag:f> <tag:g> >> <tag:h>.";
let mut count = 0;
NQuadsParser::new(file.as_ref()).parse_all(&mut |_| -> Result<(), TurtleError> {
count += 1;
Ok(())
})?;
assert_eq!(1, count);
Ok(())
}
#[test]
fn nquads_star_invalid_graph_name() {
use crate::{NQuadsParser, TurtleError};
use rio_api::parser::QuadsParser;
let file = b"<tag:s> <tag:p> <tag:o> << <tag:a> <tag:b> <tag:c> >> .";
let mut count = 0;
let res = NQuadsParser::new(file.as_ref()).parse_all(&mut |_| -> Result<(), TurtleError> {
count += 1;
Ok(())
});
assert!(res.is_err());
}
}