use super::tokenizer::TurtleTokenizer;
use super::types::{TokenKind, TurtleParsingContext, TurtleStatement};
use crate::error::{TextPosition, TurtleParseError, TurtleResult, TurtleSyntaxError};
use crate::toolkit::Parser;
#[cfg(feature = "rdf-12")]
use oxirs_core::model::literal::BaseDirection;
use oxirs_core::model::{BlankNode, Literal, Object, Predicate, QuotedTriple, Subject, Triple};
use std::collections::HashMap;
use std::io::{BufRead, Read};
#[derive(Debug, Clone)]
pub struct TurtleParser {
pub lenient: bool,
pub base_iri: Option<String>,
pub prefixes: HashMap<String, String>,
}
impl Default for TurtleParser {
fn default() -> Self {
Self::new()
}
}
impl TurtleParser {
pub fn new() -> Self {
let mut prefixes = HashMap::new();
prefixes.insert(
"rdf".to_string(),
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
);
prefixes.insert(
"rdfs".to_string(),
"http://www.w3.org/2000/01/rdf-schema#".to_string(),
);
prefixes.insert(
"xsd".to_string(),
"http://www.w3.org/2001/XMLSchema#".to_string(),
);
prefixes.insert(
"owl".to_string(),
"http://www.w3.org/2002/07/owl#".to_string(),
);
Self {
lenient: false,
base_iri: None,
prefixes,
}
}
pub fn new_lenient() -> Self {
let mut parser = Self::new();
parser.lenient = true;
parser
}
pub fn with_base_iri(mut self, base_iri: String) -> Self {
self.base_iri = Some(base_iri);
self
}
pub fn with_prefix(mut self, prefix: String, iri: String) -> Self {
self.prefixes.insert(prefix, iri);
self
}
pub fn parse_document(&self, content: &str) -> TurtleResult<Vec<Triple>> {
let mut context = TurtleParsingContext::new();
context.prefixes = self.prefixes.clone();
context.base_iri = self.base_iri.clone();
let mut tokenizer = TurtleTokenizer::new(content);
let mut triples = Vec::new();
let mut errors = Vec::new();
loop {
let statement_result = self.parse_statement(&mut tokenizer, &mut context);
match statement_result {
Ok(Some(statement)) => match statement {
TurtleStatement::Triple(triple) => triples.push(triple),
TurtleStatement::Triples(mut triple_list) => triples.append(&mut triple_list),
TurtleStatement::PrefixDecl(prefix, iri) => {
context.prefixes.insert(prefix, iri);
}
TurtleStatement::BaseDecl(iri) => {
context.base_iri = Some(iri);
}
},
Ok(None) => {
break;
}
Err(e) => {
if self.lenient {
errors.push(e);
self.skip_to_next_statement(&mut tokenizer);
} else {
return Err(e);
}
}
}
}
if !errors.is_empty() {
Err(TurtleParseError::multiple(errors))
} else {
Ok(triples)
}
}
fn skip_to_next_statement(&self, tokenizer: &mut TurtleTokenizer) {
while !tokenizer.is_at_end() {
if let Some(ch) = tokenizer.current_char() {
if ch == '.' {
tokenizer.advance();
break;
} else if ch == '\n' {
tokenizer.advance();
tokenizer.skip_whitespace_and_comments();
break;
} else {
tokenizer.advance();
}
} else {
break;
}
}
}
fn parse_statement(
&self,
tokenizer: &mut TurtleTokenizer,
context: &mut TurtleParsingContext,
) -> TurtleResult<Option<TurtleStatement>> {
tokenizer.skip_whitespace_and_comments();
if tokenizer.is_at_end() {
return Ok(None);
}
let (token, _) = tokenizer.peek_token()?;
match &token.kind {
TokenKind::PrefixKeyword => {
let _ = tokenizer.consume_token(); let prefix = self.parse_prefix_name(tokenizer)?;
let (next_token, _) = tokenizer.peek_token()?;
if matches!(next_token.kind, TokenKind::Colon) {
let _ = tokenizer.consume_token(); }
let iri = self.parse_iri_ref(tokenizer, context)?;
self.expect_token(tokenizer, TokenKind::Dot)?;
Ok(Some(TurtleStatement::PrefixDecl(prefix, iri)))
}
TokenKind::BaseKeyword => {
let _ = tokenizer.consume_token(); let iri = self.parse_iri_ref(tokenizer, context)?;
self.expect_token(tokenizer, TokenKind::Dot)?;
Ok(Some(TurtleStatement::BaseDecl(iri)))
}
_ => {
let triples = self.parse_triple(tokenizer, context)?;
self.expect_token(tokenizer, TokenKind::Dot)?;
if triples.is_empty() {
Ok(None) } else if triples.len() == 1 {
Ok(Some(TurtleStatement::Triple(
triples
.into_iter()
.next()
.expect("iterator should have next element"),
)))
} else {
Ok(Some(TurtleStatement::Triples(triples)))
}
}
}
}
fn parse_triple(
&self,
tokenizer: &mut TurtleTokenizer,
context: &mut TurtleParsingContext,
) -> TurtleResult<Vec<Triple>> {
let subject = self.parse_subject(tokenizer, context)?;
let mut triples = Vec::new();
triples.append(&mut context.pending_triples);
loop {
let (token, _) = tokenizer.peek_token()?;
if matches!(token.kind, TokenKind::Dot) {
break;
}
let predicate = self.parse_predicate(tokenizer, context)?;
loop {
let object = self.parse_object(tokenizer, context)?;
triples.append(&mut context.pending_triples);
triples.push(Triple::new(subject.clone(), predicate.clone(), object));
let (token, _) = tokenizer.peek_token()?;
if matches!(token.kind, TokenKind::Comma) {
let _ = tokenizer.consume_token(); continue;
} else {
break;
}
}
let (token, _) = tokenizer.peek_token()?;
if matches!(token.kind, TokenKind::Semicolon) {
let _ = tokenizer.consume_token(); let (next_token, _) = tokenizer.peek_token()?;
if matches!(next_token.kind, TokenKind::Dot) {
break; }
continue;
} else {
break;
}
}
Ok(triples)
}
fn parse_subject(
&self,
tokenizer: &mut TurtleTokenizer,
context: &mut TurtleParsingContext,
) -> TurtleResult<Subject> {
let (token, _) = tokenizer.peek_token()?;
match &token.kind {
TokenKind::IriRef(_) => {
let iri = self.parse_iri_ref(tokenizer, context)?;
let named_node = context
.create_named_node(&iri)
.map_err(TurtleParseError::model)?;
Ok(Subject::NamedNode(named_node))
}
TokenKind::PrefixedName(prefix, local) => {
let position = token.position;
let _ = tokenizer.consume_token();
let iri = self.resolve_prefixed_name(prefix, local, context, position)?;
let named_node = context
.create_named_node(&iri)
.map_err(TurtleParseError::model)?;
Ok(Subject::NamedNode(named_node))
}
TokenKind::BlankNodeLabel(label) => {
let _ = tokenizer.consume_token();
let blank_node = BlankNode::new(label).map_err(TurtleParseError::model)?;
Ok(Subject::BlankNode(blank_node))
}
TokenKind::LeftBracket => {
let _ = tokenizer.consume_token();
let id = context.generate_blank_node_id();
let blank_node = BlankNode::new(&id).map_err(TurtleParseError::model)?;
let (next_token, _) = tokenizer.peek_token()?;
if matches!(next_token.kind, TokenKind::RightBracket) {
let _ = tokenizer.consume_token(); return Ok(Subject::BlankNode(blank_node));
}
self.parse_blank_node_property_list(
tokenizer,
context,
Subject::BlankNode(blank_node.clone()),
)?;
self.expect_token(tokenizer, TokenKind::RightBracket)?;
Ok(Subject::BlankNode(blank_node))
}
TokenKind::DoubleLessThan => {
let _ = tokenizer.consume_token();
let inner_subject = self.parse_subject(tokenizer, context)?;
let inner_predicate = self.parse_predicate(tokenizer, context)?;
let inner_object = self.parse_object(tokenizer, context)?;
self.expect_token(tokenizer, TokenKind::DoubleGreaterThan)?;
let inner_triple = Triple::new(inner_subject, inner_predicate, inner_object);
let quoted_triple = QuotedTriple::new(inner_triple);
Ok(Subject::QuotedTriple(Box::new(quoted_triple)))
}
_ => Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Expected subject, found {:?}", token.kind),
position: token.position,
})),
}
}
fn parse_predicate(
&self,
tokenizer: &mut TurtleTokenizer,
context: &mut TurtleParsingContext,
) -> TurtleResult<Predicate> {
let (token, _) = tokenizer.peek_token()?;
match &token.kind {
TokenKind::A => {
let _ = tokenizer.consume_token();
let rdf_type = context
.create_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
.map_err(TurtleParseError::model)?;
Ok(Predicate::NamedNode(rdf_type))
}
TokenKind::IriRef(_) => {
let iri = self.parse_iri_ref(tokenizer, context)?;
let named_node = context
.create_named_node(&iri)
.map_err(TurtleParseError::model)?;
Ok(Predicate::NamedNode(named_node))
}
TokenKind::PrefixedName(prefix, local) => {
let position = token.position;
let _ = tokenizer.consume_token();
let iri = self.resolve_prefixed_name(prefix, local, context, position)?;
let named_node = context
.create_named_node(&iri)
.map_err(TurtleParseError::model)?;
Ok(Predicate::NamedNode(named_node))
}
_ => Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Expected predicate, found {:?}", token.kind),
position: token.position,
})),
}
}
fn parse_object(
&self,
tokenizer: &mut TurtleTokenizer,
context: &mut TurtleParsingContext,
) -> TurtleResult<Object> {
let (token, _) = tokenizer.peek_token()?;
match &token.kind {
TokenKind::IriRef(_) => {
let iri = self.parse_iri_ref(tokenizer, context)?;
let named_node = context
.create_named_node(&iri)
.map_err(TurtleParseError::model)?;
Ok(Object::NamedNode(named_node))
}
TokenKind::PrefixedName(prefix, local) => {
let position = token.position;
let _ = tokenizer.consume_token();
let iri = self.resolve_prefixed_name(prefix, local, context, position)?;
let named_node = context
.create_named_node(&iri)
.map_err(TurtleParseError::model)?;
Ok(Object::NamedNode(named_node))
}
TokenKind::BlankNodeLabel(label) => {
let _ = tokenizer.consume_token();
let blank_node = BlankNode::new(label).map_err(TurtleParseError::model)?;
Ok(Object::BlankNode(blank_node))
}
TokenKind::StringLiteral(value) => {
let _ = tokenizer.consume_token();
let next_token = tokenizer.peek_token().ok();
if let Some((token, _)) = next_token {
match &token.kind {
TokenKind::LanguageTag(lang, direction) => {
let _ = tokenizer.consume_token();
#[cfg(feature = "rdf-12")]
let literal = if let Some(dir) = direction {
let base_direction = match dir.as_str() {
"ltr" => BaseDirection::Ltr,
"rtl" => BaseDirection::Rtl,
_ => {
return Err(TurtleParseError::syntax(
TurtleSyntaxError::Generic {
message: format!("Invalid direction: {dir}"),
position: token.position,
},
));
}
};
Literal::new_directional_language_tagged_literal(
value,
lang,
base_direction,
)
.map_err(|e| {
TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Invalid directional language tag: {e}"),
position: token.position,
})
})?
} else {
Literal::new_language_tagged_literal(value, lang).map_err(|e| {
TurtleParseError::syntax(
crate::error::TurtleSyntaxError::Generic {
message: format!("Invalid language tag: {e}"),
position: TextPosition::default(),
},
)
})?
};
#[cfg(not(feature = "rdf-12"))]
let literal = {
if direction.is_some() {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: "Directional language tags require the 'rdf-12' feature".to_string(),
position: token.position,
}));
}
Literal::new_language_tagged_literal(value, lang).map_err(|e| {
TurtleParseError::syntax(
crate::error::TurtleSyntaxError::Generic {
message: format!("Invalid language tag: {e}"),
position: TextPosition::default(),
},
)
})?
};
Ok(Object::Literal(literal))
}
TokenKind::DataTypeAnnotation => {
let _ = tokenizer.consume_token();
let (datatype_token, _) = tokenizer.peek_token()?;
let datatype_iri = match &datatype_token.kind {
TokenKind::IriRef(_) => self.parse_iri_ref(tokenizer, context)?,
TokenKind::PrefixedName(prefix, local) => {
let position = datatype_token.position;
let _ = tokenizer.consume_token();
self.resolve_prefixed_name(prefix, local, context, position)?
}
_ => {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Expected IRI or prefixed name for datatype, found {:?}", datatype_token.kind),
position: datatype_token.position,
}));
}
};
let datatype = context
.create_named_node(&datatype_iri)
.map_err(TurtleParseError::model)?;
let literal = Literal::new_typed_literal(value, datatype);
Ok(Object::Literal(literal))
}
_ => {
let literal = Literal::new_simple_literal(value);
Ok(Object::Literal(literal))
}
}
} else {
let literal = Literal::new_simple_literal(value);
Ok(Object::Literal(literal))
}
}
TokenKind::LeftBracket => {
let _ = tokenizer.consume_token();
let id = context.generate_blank_node_id();
let blank_node = BlankNode::new(&id).map_err(TurtleParseError::model)?;
let (next_token, _) = tokenizer.peek_token()?;
if matches!(next_token.kind, TokenKind::RightBracket) {
let _ = tokenizer.consume_token(); return Ok(Object::BlankNode(blank_node));
}
self.parse_blank_node_property_list(
tokenizer,
context,
Subject::BlankNode(blank_node.clone()),
)?;
self.expect_token(tokenizer, TokenKind::RightBracket)?;
Ok(Object::BlankNode(blank_node))
}
TokenKind::Boolean(value) => {
let _ = tokenizer.consume_token();
let xsd_boolean = context
.create_named_node("http://www.w3.org/2001/XMLSchema#boolean")
.map_err(TurtleParseError::model)?;
let literal = Literal::new_typed_literal(value.to_string(), xsd_boolean);
Ok(Object::Literal(literal))
}
TokenKind::Integer(value) => {
let _ = tokenizer.consume_token();
let xsd_integer = context
.create_named_node("http://www.w3.org/2001/XMLSchema#integer")
.map_err(TurtleParseError::model)?;
let literal = Literal::new_typed_literal(value, xsd_integer);
Ok(Object::Literal(literal))
}
TokenKind::Decimal(value) => {
let _ = tokenizer.consume_token();
let xsd_decimal = context
.create_named_node("http://www.w3.org/2001/XMLSchema#decimal")
.map_err(TurtleParseError::model)?;
let literal = Literal::new_typed_literal(value, xsd_decimal);
Ok(Object::Literal(literal))
}
TokenKind::Double(value) => {
let _ = tokenizer.consume_token();
let xsd_double = context
.create_named_node("http://www.w3.org/2001/XMLSchema#double")
.map_err(TurtleParseError::model)?;
let literal = Literal::new_typed_literal(value, xsd_double);
Ok(Object::Literal(literal))
}
TokenKind::LeftParen => {
self.parse_collection(tokenizer, context)
}
TokenKind::DoubleLessThan => {
let _ = tokenizer.consume_token();
let inner_subject = self.parse_subject(tokenizer, context)?;
let inner_predicate = self.parse_predicate(tokenizer, context)?;
let inner_object = self.parse_object(tokenizer, context)?;
self.expect_token(tokenizer, TokenKind::DoubleGreaterThan)?;
let inner_triple = Triple::new(inner_subject, inner_predicate, inner_object);
let quoted_triple = QuotedTriple::new(inner_triple);
Ok(Object::QuotedTriple(Box::new(quoted_triple)))
}
_ => Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Expected object, found {:?}", token.kind),
position: token.position,
})),
}
}
fn parse_collection(
&self,
tokenizer: &mut TurtleTokenizer,
context: &mut TurtleParsingContext,
) -> TurtleResult<Object> {
let _ = tokenizer.consume_token();
let (next_token, _) = tokenizer.peek_token()?;
if matches!(next_token.kind, TokenKind::RightParen) {
let _ = tokenizer.consume_token(); let rdf_nil = context
.create_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
.map_err(TurtleParseError::model)?;
return Ok(Object::NamedNode(rdf_nil));
}
let rdf_first = context
.create_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#first")
.map_err(TurtleParseError::model)?;
let rdf_rest = context
.create_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#rest")
.map_err(TurtleParseError::model)?;
let rdf_nil = context
.create_named_node("http://www.w3.org/1999/02/22-rdf-syntax-ns#nil")
.map_err(TurtleParseError::model)?;
let first_id = context.generate_blank_node_id();
let first_bn = BlankNode::new(&first_id).map_err(TurtleParseError::model)?;
let mut current_bn = first_bn.clone();
loop {
let item = self.parse_object(tokenizer, context)?;
let triple = Triple::new(
Subject::BlankNode(current_bn.clone()),
Predicate::NamedNode(rdf_first.clone()),
item,
);
context.pending_triples.push(triple);
let (next_token, _) = tokenizer.peek_token()?;
if matches!(next_token.kind, TokenKind::RightParen) {
let _ = tokenizer.consume_token(); let triple = Triple::new(
Subject::BlankNode(current_bn),
Predicate::NamedNode(rdf_rest),
Object::NamedNode(rdf_nil),
);
context.pending_triples.push(triple);
break;
} else {
let next_id = context.generate_blank_node_id();
let next_bn = BlankNode::new(&next_id).map_err(TurtleParseError::model)?;
let triple = Triple::new(
Subject::BlankNode(current_bn),
Predicate::NamedNode(rdf_rest.clone()),
Object::BlankNode(next_bn.clone()),
);
context.pending_triples.push(triple);
current_bn = next_bn;
}
}
Ok(Object::BlankNode(first_bn))
}
fn parse_iri_ref(
&self,
tokenizer: &mut TurtleTokenizer,
context: &TurtleParsingContext,
) -> TurtleResult<String> {
let token = tokenizer.consume_token()?;
if let TokenKind::IriRef(iri) = &token.kind {
Ok(context.resolve_iri(iri))
} else {
Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Expected IRI reference, found {:?}", token.kind),
position: token.position,
}))
}
}
fn parse_prefix_name(&self, tokenizer: &mut TurtleTokenizer) -> TurtleResult<String> {
let (token, _) = tokenizer.peek_token()?;
match &token.kind {
TokenKind::Colon => {
Ok(String::new())
}
TokenKind::PrefixName(name) => {
let _ = tokenizer.consume_token();
Ok(name.clone())
}
TokenKind::PrefixedName(prefix, local) if local.is_empty() => {
let _ = tokenizer.consume_token();
Ok(prefix.clone())
}
_ => Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Expected prefix name, found {:?}", token.kind),
position: token.position,
})),
}
}
fn parse_blank_node_property_list(
&self,
tokenizer: &mut TurtleTokenizer,
context: &mut TurtleParsingContext,
subject: Subject,
) -> TurtleResult<()> {
loop {
let (token, _) = tokenizer.peek_token()?;
if matches!(token.kind, TokenKind::RightBracket) {
break;
}
let predicate = self.parse_predicate(tokenizer, context)?;
loop {
let object = self.parse_object(tokenizer, context)?;
let triple = Triple::new(subject.clone(), predicate.clone(), object);
context.pending_triples.push(triple);
let (token, _) = tokenizer.peek_token()?;
if matches!(token.kind, TokenKind::Comma) {
let _ = tokenizer.consume_token(); continue;
} else {
break;
}
}
let (token, _) = tokenizer.peek_token()?;
if matches!(token.kind, TokenKind::Semicolon) {
let _ = tokenizer.consume_token(); let (next_token, _) = tokenizer.peek_token()?;
if matches!(next_token.kind, TokenKind::RightBracket) {
break; }
continue;
} else {
break;
}
}
Ok(())
}
fn expect_token(
&self,
tokenizer: &mut TurtleTokenizer,
expected: TokenKind,
) -> TurtleResult<()> {
let token = tokenizer.consume_token()?;
if std::mem::discriminant(&token.kind) == std::mem::discriminant(&expected) {
Ok(())
} else {
Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Expected {:?}, found {:?}", expected, token.kind),
position: token.position,
}))
}
}
fn resolve_prefixed_name(
&self,
prefix: &str,
local: &str,
context: &TurtleParsingContext,
position: TextPosition,
) -> TurtleResult<String> {
if let Some(prefix_iri) = context.prefixes.get(prefix) {
Ok(format!("{prefix_iri}{local}"))
} else {
Err(TurtleParseError::syntax(
TurtleSyntaxError::UndefinedPrefix {
prefix: prefix.to_string(),
position,
},
))
}
}
}
impl Parser<Triple> for TurtleParser {
fn parse<R: Read>(&self, mut reader: R) -> TurtleResult<Vec<Triple>> {
let mut content = String::new();
reader
.read_to_string(&mut content)
.map_err(TurtleParseError::io)?;
self.parse_document(&content)
}
fn for_reader<R: BufRead>(&self, reader: R) -> Box<dyn Iterator<Item = TurtleResult<Triple>>> {
let content = reader
.lines()
.collect::<Result<Vec<_>, _>>()
.map(|lines| lines.join("\n"));
match content {
Ok(content) => match self.parse_document(&content) {
Ok(triples) => Box::new(triples.into_iter().map(Ok)),
Err(e) => Box::new(std::iter::once(Err(e))),
},
Err(e) => Box::new(std::iter::once(Err(TurtleParseError::io(e)))),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_parse_simple_triple_with_prefix() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:subject ex:predicate ex:object .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
if let Subject::NamedNode(s) = triples[0].subject() {
assert_eq!(s.as_str(), "http://example.org/subject");
} else {
panic!("expected named node subject");
}
}
#[test]
fn test_parse_string_literal() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:name "Alice" .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
if let Object::Literal(lit) = triples[0].object() {
assert_eq!(lit.value(), "Alice");
} else {
panic!("expected literal object");
}
}
#[test]
fn test_parse_language_tagged_literal() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:label "Alice"@en .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
if let Object::Literal(lit) = triples[0].object() {
assert_eq!(lit.value(), "Alice");
assert_eq!(lit.language(), Some("en"));
} else {
panic!("expected language-tagged literal");
}
}
#[test]
fn test_parse_typed_literal() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
ex:item ex:count "42"^^xsd:integer .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
if let Object::Literal(lit) = triples[0].object() {
assert_eq!(lit.value(), "42");
assert_eq!(
lit.datatype().as_str(),
"http://www.w3.org/2001/XMLSchema#integer"
);
} else {
panic!("expected typed literal");
}
}
#[test]
fn test_parse_blank_node_subject() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
_:b1 ex:type ex:Thing .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
assert!(
matches!(triples[0].subject(), Subject::BlankNode(_)),
"subject should be blank node"
);
}
#[test]
fn test_parse_semicolon_abbreviated_predicates() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:name "Alice" ;
ex:age "30" .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 2, "semicolon should produce two triples");
let subject_0 = triples[0].subject().to_string();
let subject_1 = triples[1].subject().to_string();
assert_eq!(subject_0, subject_1, "both triples should share subject");
}
#[test]
fn test_parse_comma_abbreviated_objects() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:knows ex:bob , ex:charlie .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 2, "comma should produce two triples");
}
#[test]
fn test_parse_rdf_type_shortcut() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice a ex:Person .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
if let Predicate::NamedNode(pred) = triples[0].predicate() {
assert_eq!(
pred.as_str(),
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
);
} else {
panic!("expected named node predicate");
}
}
#[test]
fn test_parse_blank_node_property_list() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:address [ ex:city "London" ; ex:country "UK" ] .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert!(
triples.len() >= 2,
"blank node property list should produce multiple triples"
);
}
#[test]
fn test_parse_multiple_prefix_declarations() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
ex:alice foaf:name "Alice" .
ex:bob foaf:name "Bob" .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 2);
}
#[test]
fn test_parse_base_iri_declaration() {
let parser = TurtleParser::new();
let input = r#"
@base <http://example.org/> .
<alice> <knows> <bob> .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
}
#[test]
fn test_parse_comments_are_ignored() {
let parser = TurtleParser::new();
let input = r#"
# This is a comment
@prefix ex: <http://example.org/> . # inline comment
ex:s ex:p "o" . # another comment
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
}
#[test]
fn test_parse_iri_object() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:knows ex:bob .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
if let Object::NamedNode(nn) = triples[0].object() {
assert_eq!(nn.as_str(), "http://example.org/bob");
} else {
panic!("expected named node object");
}
}
#[test]
fn test_parse_empty_document() {
let parser = TurtleParser::new();
let input = "# only comments\n\n";
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert!(triples.is_empty(), "empty document produces no triples");
}
#[test]
fn test_parse_multiple_triples() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:a ex:p1 "v1" .
ex:b ex:p2 "v2" .
ex:c ex:p3 "v3" .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 3);
}
#[test]
fn test_parse_undefined_prefix_error() {
let parser = TurtleParser::new();
let input = r#"undeclared:subject ex:predicate "object" ."#;
let result = parser.parse_document(input);
assert!(result.is_err(), "undefined prefix should produce an error");
}
#[test]
fn test_for_reader_interface() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:s ex:p "o" .
"#;
let triples: Vec<_> = parser
.for_reader(Cursor::new(input))
.collect::<Result<Vec<_>, _>>()
.expect("for_reader should succeed");
assert_eq!(triples.len(), 1);
}
#[test]
fn test_parse_with_base_iri_builder() {
let parser = TurtleParser::new().with_base_iri("http://example.org/base/".to_string());
let input = r#"<subject> <predicate> "object" ."#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
}
#[test]
fn test_parse_with_prefix_builder() {
let parser = TurtleParser::new()
.with_prefix("myns".to_string(), "http://myns.example.org/".to_string());
let input = r#"myns:subject myns:predicate "object" ."#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
if let Subject::NamedNode(s) = triples[0].subject() {
assert_eq!(s.as_str(), "http://myns.example.org/subject");
} else {
panic!("expected named node subject");
}
}
#[test]
fn test_parse_trailing_semicolon() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:name "Alice" ;
ex:age "30" ;
.
"#;
let triples = parser
.parse_document(input)
.expect("trailing semicolon should be tolerated");
assert_eq!(triples.len(), 2);
}
#[test]
fn test_parse_blank_node_object() {
let parser = TurtleParser::new();
let input = r#"
@prefix ex: <http://example.org/> .
ex:alice ex:knows _:unknown .
"#;
let triples = parser
.parse_document(input)
.expect("turtle parsing should succeed");
assert_eq!(triples.len(), 1);
assert!(
matches!(triples[0].object(), Object::BlankNode(_)),
"object should be blank node"
);
}
#[test]
fn test_parse_via_reader() {
let parser = TurtleParser::new();
let input = b"@prefix ex: <http://example.org/> .\nex:s ex:p \"o\" .\n";
let triples = parser
.parse(Cursor::new(input))
.expect("parsing via reader should succeed");
assert_eq!(triples.len(), 1);
}
}