use crate::error::*;
use crate::shared::*;
use crate::triple_allocator::TripleAllocator;
use crate::utils::*;
use oxiri::Iri;
use rio_api::model::*;
use rio_api::parser::{QuadsParser, TriplesParser};
use std::collections::HashMap;
use std::io::BufRead;
use std::str;
pub struct TurtleParser<R: BufRead> {
read: LookAheadByteReader<R>,
base_iri: Option<Iri<String>>,
prefixes: HashMap<String, String>,
bnode_id_generator: BlankNodeIdGenerator,
triple_alloc: TripleAllocator,
temp_buf: String,
}
impl<R: BufRead> TurtleParser<R> {
pub fn new(reader: R, base_iri: Option<Iri<String>>) -> Self {
let mut triple_alloc = TripleAllocator::new();
triple_alloc.push_triple_start();
Self {
read: LookAheadByteReader::new(reader),
base_iri,
prefixes: HashMap::default(),
bnode_id_generator: BlankNodeIdGenerator::default(),
triple_alloc,
temp_buf: String::default(),
}
}
pub fn prefixes(&self) -> &HashMap<String, String> {
&self.prefixes
}
}
impl<R: BufRead> TriplesParser for TurtleParser<R> {
type Error = TurtleError;
fn parse_step<E: From<TurtleError>>(
&mut self,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
parse_statement(self, on_triple)
}
fn is_end(&self) -> bool {
self.read.current().is_none()
}
}
pub struct TriGParser<R: BufRead> {
inner: TurtleParser<R>,
graph_name_buf: String,
}
impl<R: BufRead> TriGParser<R> {
pub fn new(reader: R, base_iri: Option<Iri<String>>) -> Self {
Self {
inner: TurtleParser::new(reader, base_iri),
graph_name_buf: String::default(),
}
}
pub fn prefixes(&self) -> &HashMap<String, String> {
&self.inner.prefixes
}
}
impl<R: BufRead> QuadsParser for TriGParser<R> {
type Error = TurtleError;
fn parse_step<E: From<TurtleError>>(
&mut self,
on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
) -> Result<(), E> {
parse_block_or_directive(self, on_quad)
}
fn is_end(&self) -> bool {
self.inner.read.current().is_none()
}
}
pub(crate) const RDF_TYPE: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type";
pub(crate) const RDF_NIL: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil";
pub(crate) const RDF_FIRST: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#first";
pub(crate) const RDF_REST: &str = "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest";
pub(crate) const XSD_BOOLEAN: &str = "http://www.w3.org/2001/XMLSchema#boolean";
pub(crate) const XSD_DECIMAL: &str = "http://www.w3.org/2001/XMLSchema#decimal";
pub(crate) const XSD_DOUBLE: &str = "http://www.w3.org/2001/XMLSchema#double";
pub(crate) const XSD_INTEGER: &str = "http://www.w3.org/2001/XMLSchema#integer";
fn parse_statement<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
skip_whitespace(&mut parser.read)?;
if parser.read.current().is_none() {
Ok(())
} else if parser.read.starts_with(b"@prefix") {
parse_prefix_id(
&mut parser.read,
&mut parser.prefixes,
&parser.base_iri,
&mut parser.temp_buf,
)
.map_err(E::from)
} else if parser.read.starts_with(b"@base") {
parser.base_iri = Some(parse_base(
&mut parser.read,
&mut parser.temp_buf,
&parser.base_iri,
)?);
Ok(())
} else if parser.read.starts_with_ignore_ascii_case(b"BASE")
&& parser
.read
.ahead(4)?
.map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
{
parser.base_iri = Some(parse_sparql_base(
&mut parser.read,
&mut parser.temp_buf,
&parser.base_iri,
)?);
Ok(())
} else if parser.read.starts_with_ignore_ascii_case(b"PREFIX")
&& parser
.read
.ahead(6)?
.map_or(true, |c| c.is_ascii_whitespace())
{
parse_sparql_prefix(
&mut parser.read,
&mut parser.prefixes,
&parser.base_iri,
&mut parser.temp_buf,
)
.map_err(E::from)
} else {
parse_triples(parser, on_triple)?;
debug_assert_eq!(parser.triple_alloc.complete_len(), 0);
debug_assert_eq!(parser.triple_alloc.incomplete_len(), 1);
parser.read.check_is_current(b'.')?;
parser.read.consume()?;
Ok(())
}
}
fn parse_block_or_directive<E: From<TurtleError>>(
parser: &mut TriGParser<impl BufRead>,
on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
) -> Result<(), E> {
skip_whitespace(&mut parser.inner.read)?;
if parser.inner.read.current().is_none() {
Ok(())
} else if parser.inner.read.starts_with(b"@prefix") {
parse_prefix_id(
&mut parser.inner.read,
&mut parser.inner.prefixes,
&parser.inner.base_iri,
&mut parser.inner.temp_buf,
)?;
Ok(())
} else if parser.inner.read.starts_with(b"@base") {
parser.inner.base_iri = Some(parse_base(
&mut parser.inner.read,
&mut parser.inner.temp_buf,
&parser.inner.base_iri,
)?);
Ok(())
} else if parser.inner.read.starts_with_ignore_ascii_case(b"BASE")
&& parser
.inner
.read
.ahead(4)?
.map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
{
parser.inner.base_iri = Some(parse_sparql_base(
&mut parser.inner.read,
&mut parser.inner.temp_buf,
&parser.inner.base_iri,
)?);
Ok(())
} else if parser.inner.read.starts_with_ignore_ascii_case(b"PREFIX")
&& parser
.inner
.read
.ahead(6)?
.map_or(true, |c| c.is_ascii_whitespace())
{
parse_sparql_prefix(
&mut parser.inner.read,
&mut parser.inner.prefixes,
&parser.inner.base_iri,
&mut parser.inner.temp_buf,
)?;
Ok(())
} else if parser.inner.read.starts_with_ignore_ascii_case(b"GRAPH")
&& parser
.inner
.read
.ahead(5)?
.map_or(true, |c| c.is_ascii_whitespace() || c == b'<')
{
parser.inner.read.consume_many("GRAPH".len())?;
skip_whitespace(&mut parser.inner.read)?;
let graph_name = parse_label_or_subject(&mut parser.graph_name_buf, &mut parser.inner)?;
skip_whitespace(&mut parser.inner.read)?;
parse_wrapped_graph(
&mut parser.inner,
&mut on_triple_in_graph(on_quad, Some(graph_name)),
)?;
parser.graph_name_buf.clear();
Ok(())
} else if parser.inner.read.current() == Some(b'{') {
parse_wrapped_graph(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))
} else if parser.inner.read.current() == Some(b'[')
&& !is_followed_by_space_and_closing_bracket(&mut parser.inner.read)?
|| parser.inner.read.current() == Some(b'(')
{
parse_triples2(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))
} else {
parse_triples_or_graph(parser, on_quad)
}
}
fn parse_triples_or_graph<E: From<TurtleError>>(
parser: &mut TriGParser<impl BufRead>,
on_quad: &mut impl FnMut(Quad<'_>) -> Result<(), E>,
) -> Result<(), E> {
if parser.inner.read.starts_with(b"<<") {
parse_quoted_triple(&mut parser.inner)?;
parser.inner.triple_alloc.push_subject_triple();
skip_whitespace(&mut parser.inner.read)?;
parse_predicate_object_list(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))?;
parser.inner.read.check_is_current(b'.')?;
parser.inner.read.consume()?;
parser.inner.triple_alloc.pop_subject();
return Ok(());
}
let TriGParser {
inner,
graph_name_buf,
} = parser;
let graph_name = parse_label_or_subject(graph_name_buf, inner)?;
skip_whitespace(&mut inner.read)?;
if inner.read.current() == Some(b'{') {
parse_wrapped_graph(
&mut parser.inner,
&mut on_triple_in_graph(on_quad, Some(graph_name)),
)?;
} else {
let blank = matches!(graph_name, GraphName::BlankNode(_));
inner.triple_alloc.try_push_subject(|b| {
b.push_str(graph_name_buf);
if blank {
Ok(Subject::BlankNode(BlankNode { id: b }))
} else {
Ok(Subject::NamedNode(NamedNode { iri: b }))
}
})?;
parse_predicate_object_list(&mut parser.inner, &mut on_triple_in_graph(on_quad, None))?;
parser.inner.read.check_is_current(b'.')?;
parser.inner.read.consume()?;
parser.inner.triple_alloc.pop_subject();
debug_assert_eq!(parser.inner.triple_alloc.complete_len(), 0);
debug_assert_eq!(parser.inner.triple_alloc.incomplete_len(), 1);
}
parser.graph_name_buf.clear();
Ok(())
}
fn parse_triples2<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
match parser.read.current() {
Some(b'[') if !is_followed_by_space_and_closing_bracket(&mut parser.read)? => {
let id = parse_blank_node_property_list(parser, on_triple)?;
parser.triple_alloc.try_push_subject(|b| {
b.push_str(id.as_ref());
Ok(Subject::from(BlankNode { id: b }))
})?;
skip_whitespace(&mut parser.read)?;
if parser.read.current() != Some(b'.') {
parse_predicate_object_list(parser, on_triple)?;
}
}
_ => {
let collec = parse_collection(parser, on_triple)?;
parser
.triple_alloc
.try_push_subject(|b| allocate_collection(collec, b))?;
skip_whitespace(&mut parser.read)?;
parse_predicate_object_list(parser, on_triple)?;
}
}
parser.triple_alloc.pop_subject();
debug_assert_eq!(parser.triple_alloc.complete_len(), 0);
debug_assert_eq!(parser.triple_alloc.incomplete_len(), 1);
parser.read.check_is_current(b'.')?;
parser.read.consume()?;
Ok(())
}
fn parse_wrapped_graph<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
parser.read.check_is_current(b'{')?;
parser.read.consume()?;
skip_whitespace(&mut parser.read)?;
loop {
if parser.read.current() == Some(b'}') {
parser.read.consume()?;
break;
}
parse_triples(parser, on_triple)?;
debug_assert_eq!(parser.triple_alloc.complete_len(), 0);
debug_assert_eq!(parser.triple_alloc.incomplete_len(), 1);
match parser.read.current() {
Some(b'.') => {
parser.read.consume()?;
skip_whitespace(&mut parser.read)?;
}
Some(b'}') => {
parser.read.consume()?;
break;
}
_ => parser.read.unexpected_char_error()?,
}
}
Ok(())
}
fn parse_label_or_subject<'a>(
buffer: &'a mut String,
parser: &mut TurtleParser<impl BufRead>,
) -> Result<GraphName<'a>, TurtleError> {
let TurtleParser {
read,
base_iri,
prefixes,
bnode_id_generator,
temp_buf,
..
} = parser;
Ok(match read.current() {
Some(b'_') | Some(b'[') => parse_blank_node(read, buffer, bnode_id_generator)?.into(),
_ => parse_iri(read, buffer, temp_buf, base_iri, prefixes)?.into(),
})
}
fn parse_prefix_id(
read: &mut LookAheadByteReader<impl BufRead>,
prefixes: &mut HashMap<String, String>,
base_iri: &Option<Iri<String>>,
temp_buffer: &mut String,
) -> Result<(), TurtleError> {
read.consume_many("@prefix".len())?;
skip_whitespace(read)?;
let mut prefix = String::default();
parse_pname_ns(read, &mut prefix)?;
skip_whitespace(read)?;
let mut value = String::default();
parse_iriref_relative(read, &mut value, temp_buffer, base_iri)?;
skip_whitespace(read)?;
read.check_is_current(b'.')?;
read.consume()?;
prefixes.insert(prefix, value);
Ok(())
}
pub(crate) fn parse_base(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
base_iri: &Option<Iri<String>>,
) -> Result<Iri<String>, TurtleError> {
read.consume_many("@base".len())?;
skip_whitespace(read)?;
let result = parse_base_iriref(read, buffer, base_iri)?;
skip_whitespace(read)?;
read.check_is_current(b'.')?;
read.consume()?;
Ok(result)
}
pub(crate) fn parse_sparql_base(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
base_iri: &Option<Iri<String>>,
) -> Result<Iri<String>, TurtleError> {
read.consume_many("BASE".len())?;
skip_whitespace(read)?;
parse_base_iriref(read, buffer, base_iri)
}
fn parse_base_iriref(
read: &mut LookAheadByteReader<impl BufRead>,
temp_buffer: &mut String,
base_iri: &Option<Iri<String>>,
) -> Result<Iri<String>, TurtleError> {
let mut buffer = String::default();
parse_iriref_relative(read, &mut buffer, temp_buffer, base_iri)?;
let result = Iri::parse(buffer.clone())
.map_err(|error| read.parse_error(TurtleErrorKind::InvalidIri { iri: buffer, error }))?;
temp_buffer.clear();
Ok(result)
}
fn parse_sparql_prefix(
read: &mut LookAheadByteReader<impl BufRead>,
prefixes: &mut HashMap<String, String>,
base_iri: &Option<Iri<String>>,
temp_buffer: &mut String,
) -> Result<(), TurtleError> {
read.consume_many("PREFIX".len())?;
skip_whitespace(read)?;
let mut prefix = String::default();
parse_pname_ns(read, &mut prefix)?;
skip_whitespace(read)?;
let mut value = String::default();
parse_iriref_relative(read, &mut value, temp_buffer, base_iri)?;
skip_whitespace(read)?;
prefixes.insert(prefix, value);
Ok(())
}
fn parse_triples<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
match parser.read.current() {
Some(b'[') if !is_followed_by_space_and_closing_bracket(&mut parser.read)? => {
let id = parse_blank_node_property_list(parser, on_triple)?;
parser.triple_alloc.try_push_subject(|b| {
b.push_str(id.as_ref());
Ok(Subject::from(BlankNode { id: b }))
})?;
skip_whitespace(&mut parser.read)?;
if parser.read.current() != Some(b'.') && parser.read.current() != Some(b'}') {
parse_predicate_object_list(parser, on_triple)?;
}
}
_ => {
parse_subject(parser, on_triple)?;
skip_whitespace(&mut parser.read)?;
parse_predicate_object_list(parser, on_triple)?;
}
}
parser.triple_alloc.pop_subject();
Ok(())
}
fn parse_predicate_object_list<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
loop {
parse_verb(parser)?;
skip_whitespace(&mut parser.read)?;
parse_object_list(parser, on_triple)?;
skip_whitespace(&mut parser.read)?;
parser.triple_alloc.pop_predicate();
if parser.read.current() != Some(b';') {
return Ok(());
}
while parser.read.current() == Some(b';') {
parser.read.consume()?;
skip_whitespace(&mut parser.read)?;
}
match parser.read.current() {
Some(b'.') | Some(b']') | Some(b'}') | None => return Ok(()),
Some(b'|') => return Ok(()),
_ => (), }
}
}
fn parse_object_list<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
loop {
parse_object(parser, on_triple)?;
skip_whitespace(&mut parser.read)?;
if parser.read.current() == Some(b'{') {
parser.read.check_is_next(b'|')?;
parser.read.consume_many(2)?;
skip_whitespace(&mut parser.read)?;
parser.triple_alloc.push_triple_start();
parser.triple_alloc.push_subject_triple();
parse_predicate_object_list(parser, on_triple)?;
parser.read.check_is_current(b'|')?;
parser.read.check_is_next(b'}')?;
parser.read.consume_many(2)?;
skip_whitespace(&mut parser.read)?;
parser.triple_alloc.pop_annotation_triple();
}
parser.triple_alloc.pop_object();
if parser.read.current() != Some(b',') {
return Ok(());
}
parser.read.consume()?;
skip_whitespace(&mut parser.read)?;
}
}
fn parse_verb(parser: &mut TurtleParser<impl BufRead>) -> Result<(), TurtleError> {
if parser.read.current() == Some(b'a') {
match parser.read.next()? {
Some(c) if is_possible_pn_chars_ascii(c) || c == b'.' || c == b':' || c > MAX_ASCII => {
parse_predicate(parser)
}
_ => {
parser.read.consume()?;
parser
.triple_alloc
.try_push_predicate(|_| Ok(NamedNode { iri: RDF_TYPE }))
}
}
} else {
parse_predicate(parser)
}
}
fn parse_subject<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
match parser.read.current() {
Some(b'_') | Some(b'[') => {
let TurtleParser {
read,
bnode_id_generator,
triple_alloc,
..
} = parser;
triple_alloc.try_push_subject(|b| {
parse_blank_node(read, b, bnode_id_generator).map(Subject::from)
})?;
}
Some(b'(') => {
let collec = parse_collection(parser, on_triple)?;
parser
.triple_alloc
.try_push_subject(|b| allocate_collection(collec, b))?;
}
_ => {
if parser.read.required_current()? == b'<' && parser.read.required_next()? == b'<' {
parse_quoted_triple(parser)?;
parser.triple_alloc.push_subject_triple();
} else {
let TurtleParser {
read,
base_iri,
prefixes,
triple_alloc,
temp_buf,
..
} = parser;
triple_alloc.try_push_subject(|b| {
parse_iri(read, b, temp_buf, base_iri, prefixes).map(Subject::from)
})?;
}
}
};
Ok(())
}
fn parse_predicate(parser: &mut TurtleParser<impl BufRead>) -> Result<(), TurtleError> {
let TurtleParser {
read,
base_iri,
prefixes,
triple_alloc,
temp_buf,
..
} = parser;
triple_alloc.try_push_predicate(|b| parse_iri(read, b, temp_buf, base_iri, prefixes))
}
fn parse_object<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<(), E> {
match parser.read.required_current()? {
b'<' => {
if parser.read.required_next()? == b'<' {
parse_quoted_triple(parser)?;
parser.triple_alloc.push_object_triple();
} else {
let TurtleParser {
read,
base_iri,
triple_alloc,
temp_buf,
..
} = parser;
triple_alloc.try_push_object(|b, _| {
parse_iriref_relative(read, b, temp_buf, base_iri).map(Term::from)
})?;
}
}
b'(' => {
let collec = parse_collection(parser, on_triple)?;
parser
.triple_alloc
.try_push_object(|b, _| allocate_collection(collec, b).map(Term::from))?;
}
b'[' if !is_followed_by_space_and_closing_bracket(&mut parser.read)? => {
let id = parse_blank_node_property_list(parser, on_triple)?;
parser.triple_alloc.try_push_object(|b, _| {
b.push_str(id.as_ref());
Ok(Term::from(BlankNode { id: b }))
})?;
}
b'_' | b'[' => {
let TurtleParser {
read,
bnode_id_generator,
triple_alloc,
..
} = parser;
triple_alloc.try_push_object(|b, _| {
parse_blank_node(read, b, bnode_id_generator).map(Term::from)
})?;
}
b'"' | b'\'' => {
let TurtleParser {
read,
base_iri,
prefixes,
triple_alloc,
temp_buf,
..
} = parser;
triple_alloc.try_push_object(|b1, b2| {
parse_rdf_literal(read, b1, b2, temp_buf, base_iri, prefixes).map(Term::from)
})?;
}
b'+' | b'-' | b'.' | b'0'..=b'9' => {
let TurtleParser {
read, triple_alloc, ..
} = parser;
triple_alloc.try_push_object(|b, _| parse_numeric_literal(read, b).map(Term::from))?;
}
_ => {
let TurtleParser {
read, triple_alloc, ..
} = parser;
if read.starts_with(b"true")
&& read.ahead(4)?.map_or(true, |c| {
c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
})
|| read.starts_with(b"false")
&& read.ahead(5)?.map_or(true, |c| {
c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
})
{
triple_alloc
.try_push_object(|b, _| parse_boolean_literal(read, b).map(Term::from))?;
} else {
let TurtleParser {
read,
prefixes,
triple_alloc,
..
} = parser;
triple_alloc.try_push_object(|b, _| {
parse_prefixed_name(read, b, prefixes).map(Term::from)
})?;
}
}
};
on_triple(*parser.triple_alloc.top())
}
fn parse_blank_node_property_list<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<BlankNodeId, E> {
parser.read.increment_stack_size()?;
parser.read.check_is_current(b'[')?;
parser.read.consume()?;
skip_whitespace(&mut parser.read)?;
let id = parser.bnode_id_generator.generate();
parser.triple_alloc.push_triple_start();
parser.triple_alloc.try_push_subject(|b| {
b.push_str(id.as_ref());
Ok(Subject::from(BlankNode { id: b }))
})?;
loop {
parse_predicate_object_list(parser, on_triple)?;
skip_whitespace(&mut parser.read)?;
if parser.read.current() == Some(b']') {
parser.read.consume()?;
break;
}
}
parser.triple_alloc.pop_subject();
parser.triple_alloc.pop_top_empty_triple();
parser.read.decrement_stack_size();
Ok(id)
}
fn parse_collection<E: From<TurtleError>>(
parser: &mut TurtleParser<impl BufRead>,
on_triple: &mut impl FnMut(Triple<'_>) -> Result<(), E>,
) -> Result<Option<BlankNodeId>, E> {
parser.read.increment_stack_size()?;
parser.read.check_is_current(b'(')?;
parser.read.consume()?;
let mut root: Option<BlankNodeId> = None;
loop {
skip_whitespace(&mut parser.read)?;
if parser.read.current().is_none() {
return Ok(parser.read.unexpected_char_error()?);
} else if parser.read.current() != Some(b')') {
let new = parser.bnode_id_generator.generate();
if root.is_none() {
root = Some(new);
parser.triple_alloc.push_triple_start();
} else {
parser
.triple_alloc
.try_push_predicate(|_| Ok(NamedNode { iri: RDF_REST }))?;
parser.triple_alloc.try_push_object(|b, _| {
b.push_str(new.as_ref());
Ok(Term::from(BlankNode { id: b }))
})?;
on_triple(*parser.triple_alloc.top())?;
parser.triple_alloc.pop_object();
parser.triple_alloc.pop_predicate();
parser.triple_alloc.pop_subject();
}
parser.triple_alloc.try_push_subject(|b| {
b.push_str(new.as_ref());
Ok(Subject::from(BlankNode { id: b }))
})?;
parser
.triple_alloc
.try_push_predicate(|_| Ok(NamedNode { iri: RDF_FIRST }))?;
parse_object(parser, on_triple)?;
parser.triple_alloc.pop_object();
parser.triple_alloc.pop_predicate();
} else {
parser.read.consume()?;
if root.is_some() {
parser
.triple_alloc
.try_push_predicate(|_| Ok(NamedNode { iri: RDF_REST }))?;
parser
.triple_alloc
.try_push_object(|_, _| Ok(Term::from(NamedNode { iri: RDF_NIL })))?;
on_triple(*parser.triple_alloc.top())?;
parser.triple_alloc.pop_top_triple();
}
parser.read.decrement_stack_size();
return Ok(root);
}
}
}
#[allow(clippy::unnecessary_wraps)]
fn allocate_collection(
collection: Option<BlankNodeId>,
buffer: &mut String,
) -> Result<Subject<'_>, TurtleError> {
match collection {
Some(id) => {
buffer.push_str(id.as_ref());
Ok(BlankNode { id: buffer }.into())
}
None => Ok(NamedNode { iri: RDF_NIL }.into()),
}
}
pub(crate) fn parse_numeric_literal<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
) -> Result<Literal<'a>, TurtleError> {
let c = read.required_current()?;
match c {
b'+' | b'-' => {
buffer.push(char::from(c));
read.consume()?
}
_ => (),
}
let mut count_before: usize = 0;
while let Some(c) = read.current() {
match c {
b'0'..=b'9' => {
buffer.push(char::from(c));
read.consume()?;
count_before += 1;
}
_ => break,
}
}
let count_after = if read.current() == Some(b'.') {
let stop = match read.next()? {
Some(c) => !matches!(c, b'0'..=b'9' | b'e' | b'E'),
None => true,
};
if stop {
return if count_before > 0 {
Ok(Literal::Typed {
value: buffer,
datatype: NamedNode { iri: XSD_INTEGER },
})
} else {
read.unexpected_char_error()
};
}
buffer.push('.');
let mut count_after = 0;
read.consume()?;
while let Some(c) = read.current() {
match c {
b'0'..=b'9' => {
buffer.push(char::from(c));
read.consume()?;
count_after += 1;
}
_ => break,
}
}
Some(count_after)
} else {
None
};
let datatype = match read.current() {
Some(b'e') | Some(b'E') => {
if count_before > 0 || count_after.unwrap_or(0) > 0 {
parse_exponent(read, buffer)?;
XSD_DOUBLE
} else {
return read.unexpected_char_error();
}
}
_ => {
if count_after.is_none() && count_before > 0 {
XSD_INTEGER
} else if count_after.is_some() && count_after != Some(0) {
XSD_DECIMAL
} else {
return read.unexpected_char_error();
}
}
};
Ok(Literal::Typed {
value: buffer,
datatype: NamedNode { iri: datatype },
})
}
#[allow(clippy::ptr_arg)]
pub(crate) fn parse_rdf_literal<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
annotation_buffer: &'a mut String,
temp_buffer: &mut String,
base_iri: &Option<Iri<String>>,
prefixes: &HashMap<String, String>,
) -> Result<Literal<'a>, TurtleError> {
parse_string(read, buffer)?;
skip_whitespace(read)?;
match read.current() {
Some(b'@') => {
parse_langtag(read, annotation_buffer)?;
Ok(Literal::LanguageTaggedString {
value: buffer,
language: annotation_buffer,
})
}
Some(b'^') => {
read.consume()?;
read.check_is_current(b'^')?;
read.consume()?;
skip_whitespace(read)?;
parse_iri(read, annotation_buffer, temp_buffer, base_iri, prefixes)?;
Ok(Literal::Typed {
value: buffer,
datatype: NamedNode {
iri: annotation_buffer,
},
})
}
_ => Ok(Literal::Simple { value: buffer }),
}
}
pub(crate) fn parse_boolean_literal<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
) -> Result<Literal<'a>, TurtleError> {
if read.starts_with(b"true") {
read.consume_many("true".len())?;
buffer.push_str("true");
} else if read.starts_with(b"false") {
read.consume_many("false".len())?;
buffer.push_str("false");
} else {
return read.unexpected_char_error();
}
Ok(Literal::Typed {
value: buffer,
datatype: NamedNode { iri: XSD_BOOLEAN },
})
}
fn parse_string(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
match read.current() {
Some(b'"') => {
if read.starts_with(b"\"\"\"") {
parse_string_literal_long_quote(read, buffer)
} else {
parse_string_literal_quote(read, buffer)
}
}
Some(b'\'') => {
if read.starts_with(b"'''") {
parse_string_literal_long_single_quote(read, buffer)
} else {
parse_string_literal_single_quote(read, buffer)
}
}
_ => read.unexpected_char_error(),
}
}
pub(crate) fn parse_iri<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
temp_buffer: &mut String,
base_iri: &Option<Iri<String>>,
prefixes: &HashMap<String, String>,
) -> Result<NamedNode<'a>, TurtleError> {
if read.current() == Some(b'<') {
parse_iriref_relative(read, buffer, temp_buffer, base_iri)
} else {
parse_prefixed_name(read, buffer, prefixes)
}
}
pub(crate) fn parse_prefixed_name<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
prefixes: &HashMap<String, String>,
) -> Result<NamedNode<'a>, TurtleError> {
parse_pname_ns(read, buffer)?;
if let Some(value) = prefixes.get(buffer.as_str()) {
buffer.clear();
buffer.push_str(value);
} else {
return Err(read.parse_error(TurtleErrorKind::UnknownPrefix(buffer.clone())));
}
if let Some(c) = read.current() {
match c {
b'\\' => parse_pn_local_esc(read, buffer)?,
b'%' => parse_percent(read, buffer)?,
b':' | b'0'..=b'9' => buffer.push(char::from(c)),
c if is_possible_pn_chars_u_ascii(c) => buffer.push(char::from(c)),
_ => {
let c = read_utf8_char(read)?;
if is_possible_pn_chars_u_unicode(c) {
buffer.push(c)
} else {
return Ok(NamedNode { iri: buffer });
}
}
}
} else {
return Ok(NamedNode { iri: buffer });
}
loop {
read.consume()?;
match read.current() {
Some(b'.') => {
if has_future_char_valid_pname_local(read)? {
buffer.push('.')
} else {
break;
}
}
Some(b'\\') => parse_pn_local_esc(read, buffer)?,
Some(b'%') => parse_percent(read, buffer)?,
Some(b':') => buffer.push(':'),
Some(c) if is_possible_pn_chars_ascii(c) => buffer.push(char::from(c)),
_ => {
let c = read_utf8_char(read)?;
if is_possible_pn_chars_unicode(c) {
buffer.push(c)
} else {
break;
}
}
}
}
Ok(NamedNode { iri: buffer })
}
fn has_future_char_valid_pname_local(
read: &mut LookAheadByteReader<impl BufRead>,
) -> Result<bool, TurtleError> {
let mut i = 1;
loop {
match read.ahead(i)? {
Some(b':') | Some(b'%') | Some(b'\\') => return Ok(true),
Some(c) if c > MAX_ASCII || is_possible_pn_chars_ascii(c) => return Ok(true),
Some(b'.') => (),
_ => return Ok(false),
}
i += 1;
}
}
pub(crate) fn parse_blank_node<'a>(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &'a mut String,
bnode_id_generator: &mut BlankNodeIdGenerator,
) -> Result<BlankNode<'a>, TurtleError> {
match read.current() {
Some(b'_') => {
parse_blank_node_label(read, buffer)?;
bnode_id_generator.disambiguate(buffer);
}
Some(b'[') => {
parse_anon(read, buffer, bnode_id_generator)?;
}
_ => read.unexpected_char_error()?,
}
Ok(BlankNode { id: buffer })
}
pub(crate) fn parse_pname_ns(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
parse_pn_prefix(read, buffer)?;
if read.current() == Some(b':') {
read.consume()?;
Ok(())
} else {
read.unexpected_char_error()
}
}
fn parse_exponent(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
let c = read.required_current()?;
match c {
b'e' | b'E' => buffer.push(char::from(c)),
_ => read.unexpected_char_error()?,
};
read.consume()?;
if let Some(c) = read.current() {
match c {
b'+' | b'-' => {
buffer.push(char::from(c));
read.consume()?
}
_ => (),
}
}
match read.required_current()? {
c @ b'0'..=b'9' => buffer.push(char::from(c)),
_ => read.unexpected_char_error()?,
}
loop {
read.consume()?;
if let Some(c) = read.current() {
match c {
b'0'..=b'9' => buffer.push(char::from(c)),
_ => return Ok(()),
}
} else {
return Ok(());
}
}
}
fn parse_string_literal_single_quote(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
parse_string_literal_quote_inner(read, buffer, b'\'')
}
fn parse_string_literal_long_single_quote(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
parse_string_literal_long_quote_inner(read, buffer, b'\'')
}
fn parse_string_literal_long_quote(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
parse_string_literal_long_quote_inner(read, buffer, b'"')
}
fn parse_string_literal_long_quote_inner(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
quote: u8,
) -> Result<(), TurtleError> {
let prefix = [quote; 3];
read.consume_many(2)?;
loop {
read.consume()?;
match read.required_current()? {
c if c == quote && read.starts_with(&prefix) => {
read.consume_many(3)?;
return Ok(());
}
b'\\' => parse_echar_or_uchar(read, buffer)?,
c => buffer.push(if c <= 0x7F {
char::from(c) } else {
read_utf8_char(read)?
}),
}
}
}
fn parse_anon(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
bnode_id_generator: &mut BlankNodeIdGenerator,
) -> Result<(), TurtleError> {
read.check_is_current(b'[')?;
read.consume()?;
skip_whitespace(read)?;
read.check_is_current(b']')?;
read.consume()?;
buffer.push_str(bnode_id_generator.generate().as_ref());
Ok(())
}
fn parse_pn_prefix(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
match read.current() {
Some(c) if c <= MAX_ASCII && is_possible_pn_chars_base_ascii(c) => {
buffer.push(char::from(c))
}
_ => {
let c = read_utf8_char(read)?;
if is_possible_pn_chars_base_unicode(c) {
buffer.push(c)
} else {
return Ok(()); }
}
}
loop {
read.consume()?;
match read.current() {
Some(b'.') => match read.next()? {
Some(c) if is_possible_pn_chars_ascii(c) || c > MAX_ASCII => buffer.push('.'),
_ => {
return Ok(());
}
},
Some(c) if c <= MAX_ASCII && is_possible_pn_chars_ascii(c) => {
buffer.push(char::from(c))
}
_ => {
let c = read_utf8_char(read)?;
if is_possible_pn_chars_unicode(c) {
buffer.push(c)
} else {
return Ok(());
}
}
}
}
}
fn parse_percent(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
read.check_is_current(b'%')?;
buffer.push('%');
read.consume()?;
parse_hex(read, buffer)?;
read.consume()?;
parse_hex(read, buffer)?;
Ok(())
}
fn parse_hex(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
let c = read.required_current()?;
match c {
b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' => {
buffer.push(char::from(c));
Ok(())
}
_ => read.unexpected_char_error(),
}
}
fn parse_pn_local_esc(
read: &mut LookAheadByteReader<impl BufRead>,
buffer: &mut String,
) -> Result<(), TurtleError> {
read.check_is_current(b'\\')?;
read.consume()?;
let c = read.required_current()?;
match c {
b'_' | b'~' | b'.' | b'-' | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+'
| b',' | b';' | b'=' | b'/' | b'?' | b'#' | b'@' | b'%' => {
buffer.push(char::from(c));
Ok(())
}
_ => read.unexpected_char_error(),
}
}
pub(crate) fn skip_whitespace(
read: &mut LookAheadByteReader<impl BufRead>,
) -> Result<(), TurtleError> {
loop {
match read.current() {
Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => read.consume()?,
Some(b'#') => {
while read.current() != Some(b'\r')
&& read.current() != Some(b'\n')
&& read.current().is_some()
{
read.consume()?;
}
}
_ => return Ok(()),
}
}
}
pub(crate) fn is_followed_by_space_and_closing_bracket(
read: &mut LookAheadByteReader<impl BufRead>,
) -> Result<bool, TurtleError> {
for i in 1.. {
match read.ahead(i)? {
Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => (),
Some(b']') => return Ok(true),
_ => return Ok(false),
}
}
Ok(false)
}
fn on_triple_in_graph<'a, E>(
on_quad: &'a mut impl FnMut(Quad<'_>) -> Result<(), E>,
graph_name: Option<GraphName<'a>>,
) -> impl FnMut(Triple<'_>) -> Result<(), E> + 'a {
move |t: Triple<'_>| {
on_quad(Quad {
subject: t.subject,
predicate: t.predicate,
object: t.object,
graph_name,
})
}
}
pub(crate) fn parse_quoted_triple(
parser: &mut TurtleParser<impl BufRead>,
) -> Result<(), TurtleError> {
parser.read.increment_stack_size()?;
parser.read.consume_many(2)?;
skip_whitespace(&mut parser.read)?;
parser.triple_alloc.push_triple_start();
parse_emb_subject(parser)?;
skip_whitespace(&mut parser.read)?;
parse_verb(parser)?;
skip_whitespace(&mut parser.read)?;
parse_emb_object(parser)?;
skip_whitespace(&mut parser.read)?;
parser.read.check_is_current(b'>')?;
parser.read.check_is_next(b'>')?;
parser.read.consume_many(2)?;
parser.read.decrement_stack_size();
Ok(())
}
pub(crate) fn parse_emb_subject(
parser: &mut TurtleParser<impl BufRead>,
) -> Result<(), TurtleError> {
match parser.read.current() {
Some(b'<') => {
if parser.read.required_next()? == b'<' {
parse_quoted_triple(parser)?;
parser.triple_alloc.push_subject_triple();
Ok(())
} else {
let TurtleParser {
read,
base_iri,
triple_alloc,
temp_buf,
..
} = parser;
triple_alloc.try_push_subject(|b| {
parse_iriref_relative(read, b, temp_buf, base_iri).map(Subject::from)
})
}
}
Some(b'_') | Some(b'[') => {
let TurtleParser {
read,
bnode_id_generator,
triple_alloc,
..
} = parser;
triple_alloc.try_push_subject(|b| {
parse_blank_node(read, b, bnode_id_generator).map(Subject::from)
})
}
_ => {
let TurtleParser {
read,
prefixes,
triple_alloc,
..
} = parser;
triple_alloc
.try_push_subject(|b| parse_prefixed_name(read, b, prefixes).map(Subject::from))
}
}
}
pub(crate) fn parse_emb_object(parser: &mut TurtleParser<impl BufRead>) -> Result<(), TurtleError> {
match parser.read.required_current()? {
b'<' => {
if parser.read.required_next()? == b'<' {
parse_quoted_triple(parser)?;
parser.triple_alloc.push_object_triple();
Ok(())
} else {
let TurtleParser {
read,
base_iri,
triple_alloc,
temp_buf,
..
} = parser;
triple_alloc.try_push_object(|b, _| {
parse_iriref_relative(read, b, temp_buf, base_iri).map(Term::from)
})
}
}
b'_' | b'[' => {
let TurtleParser {
read,
bnode_id_generator,
triple_alloc,
..
} = parser;
triple_alloc.try_push_object(|b, _| {
parse_blank_node(read, b, bnode_id_generator).map(Term::from)
})
}
b'"' | b'\'' => {
let TurtleParser {
read,
base_iri,
prefixes,
triple_alloc,
temp_buf,
..
} = parser;
triple_alloc.try_push_object(|b1, b2| {
parse_rdf_literal(read, b1, b2, temp_buf, base_iri, prefixes).map(Term::from)
})
}
b'+' | b'-' | b'.' | b'0'..=b'9' => {
let TurtleParser {
read, triple_alloc, ..
} = parser;
triple_alloc.try_push_object(|b, _| parse_numeric_literal(read, b).map(Term::from))
}
_ => {
let TurtleParser {
read, triple_alloc, ..
} = parser;
if read.starts_with(b"true")
&& read.ahead(4)?.map_or(true, |c| {
c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
})
|| read.starts_with(b"false")
&& read.ahead(5)?.map_or(true, |c| {
c < MAX_ASCII && !is_possible_pn_chars_ascii(c) && c != b':'
})
{
triple_alloc.try_push_object(|b, _| parse_boolean_literal(read, b).map(Term::from))
} else {
let TurtleParser {
read,
prefixes,
triple_alloc,
..
} = parser;
triple_alloc
.try_push_object(|b, _| parse_prefixed_name(read, b, prefixes).map(Term::from))
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn issue_46() -> Result<(), TurtleError> {
let bnid = crate::utils::BlankNodeIdGenerator::default().generate();
let ttl = format!(
r#"PREFIX : <tag:>
:alice :knows [ :name "bob" ].
_:{} :name "charlie".
"#,
bnid.as_ref()
);
let mut blank_subjects = vec![];
TurtleParser::new(std::io::Cursor::new(&ttl), None).parse_all(&mut |t| -> Result<
(),
TurtleError,
> {
if let Subject::BlankNode(b) = t.subject {
blank_subjects.push(b.id.to_string());
}
Ok(())
})?;
assert_eq!(blank_subjects.len(), 2);
assert_ne!(&blank_subjects[0], &blank_subjects[1]);
Ok(())
}
}