#[cfg(feature = "async")]
mod async_parser;
mod format_states;
use format_states::{TrigParserState, TurtleParserState};
#[cfg(feature = "async")]
pub use async_parser::{AsyncRdfSink, AsyncStreamingParser, MemoryAsyncSink, ParseProgress};
use crate::model::{
BlankNode, GraphName, Literal, NamedNode, Object, Predicate, Quad, Subject, Triple,
};
use crate::{OxirsError, Result};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum RdfFormat {
Turtle,
NTriples,
TriG,
NQuads,
RdfXml,
JsonLd,
}
impl RdfFormat {
pub fn from_extension(ext: &str) -> Option<Self> {
match ext.to_lowercase().as_str() {
"ttl" | "turtle" => Some(RdfFormat::Turtle),
"nt" | "ntriples" => Some(RdfFormat::NTriples),
"trig" => Some(RdfFormat::TriG),
"nq" | "nquads" => Some(RdfFormat::NQuads),
"rdf" | "xml" | "rdfxml" => Some(RdfFormat::RdfXml),
"jsonld" | "json-ld" => Some(RdfFormat::JsonLd),
_ => None,
}
}
pub fn media_type(&self) -> &'static str {
match self {
RdfFormat::Turtle => "text/turtle",
RdfFormat::NTriples => "application/n-triples",
RdfFormat::TriG => "application/trig",
RdfFormat::NQuads => "application/n-quads",
RdfFormat::RdfXml => "application/rdf+xml",
RdfFormat::JsonLd => "application/ld+json",
}
}
pub fn extension(&self) -> &'static str {
match self {
RdfFormat::Turtle => "ttl",
RdfFormat::NTriples => "nt",
RdfFormat::TriG => "trig",
RdfFormat::NQuads => "nq",
RdfFormat::RdfXml => "rdf",
RdfFormat::JsonLd => "jsonld",
}
}
pub fn supports_quads(&self) -> bool {
matches!(self, RdfFormat::TriG | RdfFormat::NQuads)
}
}
#[derive(Debug, Clone, Default)]
pub struct ParserConfig {
pub base_iri: Option<String>,
pub ignore_errors: bool,
pub max_errors: Option<usize>,
}
#[derive(Debug, Clone)]
pub struct Parser {
format: RdfFormat,
config: ParserConfig,
}
impl Parser {
pub fn new(format: RdfFormat) -> Self {
Parser {
format,
config: ParserConfig::default(),
}
}
pub fn with_config(format: RdfFormat, config: ParserConfig) -> Self {
Parser { format, config }
}
pub fn with_base_iri(mut self, base_iri: impl Into<String>) -> Self {
self.config.base_iri = Some(base_iri.into());
self
}
pub fn with_error_tolerance(mut self, ignore_errors: bool) -> Self {
self.config.ignore_errors = ignore_errors;
self
}
pub fn parse_str_to_quads(&self, data: &str) -> Result<Vec<Quad>> {
let mut quads = Vec::new();
self.parse_str_with_handler(data, |quad| {
quads.push(quad);
Ok(())
})?;
Ok(quads)
}
pub fn parse_str_to_triples(&self, data: &str) -> Result<Vec<Triple>> {
let quads = self.parse_str_to_quads(data)?;
Ok(quads
.into_iter()
.filter(|quad| quad.is_default_graph())
.map(|quad| quad.to_triple())
.collect())
}
pub fn parse_str_with_handler<F>(&self, data: &str, handler: F) -> Result<()>
where
F: FnMut(Quad) -> Result<()>,
{
match self.format {
RdfFormat::Turtle => self.parse_turtle(data, handler),
RdfFormat::NTriples => self.parse_ntriples(data, handler),
RdfFormat::TriG => self.parse_trig(data, handler),
RdfFormat::NQuads => self.parse_nquads(data, handler),
RdfFormat::RdfXml => self.parse_rdfxml(data, handler),
RdfFormat::JsonLd => self.parse_jsonld(data, handler),
}
}
pub fn parse_bytes_to_quads(&self, data: &[u8]) -> Result<Vec<Quad>> {
let data_str = std::str::from_utf8(data)
.map_err(|e| OxirsError::Parse(format!("Invalid UTF-8: {e}")))?;
self.parse_str_to_quads(data_str)
}
fn parse_turtle<F>(&self, data: &str, mut handler: F) -> Result<()>
where
F: FnMut(Quad) -> Result<()>,
{
let mut parser = TurtleParserState::new(self.config.base_iri.as_deref());
for (line_num, line) in data.lines().enumerate() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
match parser.parse_line(line) {
Ok(triples) => {
for triple in triples {
let quad = Quad::from_triple(triple);
handler(quad)?;
}
}
Err(e) => {
if self.config.ignore_errors {
tracing::warn!("Turtle parse error on line {}: {}", line_num + 1, e);
continue;
} else {
return Err(OxirsError::Parse(format!(
"Turtle parse error on line {}: {}",
line_num + 1,
e
)));
}
}
}
}
if let Some(triples) = parser.finalize()? {
for triple in triples {
let quad = Quad::from_triple(triple);
handler(quad)?;
}
}
Ok(())
}
fn parse_ntriples<F>(&self, data: &str, mut handler: F) -> Result<()>
where
F: FnMut(Quad) -> Result<()>,
{
for (line_num, line) in data.lines().enumerate() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
match self.parse_ntriples_line(line) {
Ok(Some(quad)) => {
handler(quad)?;
}
Ok(None) => {
continue;
}
Err(e) => {
if self.config.ignore_errors {
tracing::warn!("Parse error on line {}: {}", line_num + 1, e);
continue;
} else {
return Err(OxirsError::Parse(format!(
"Parse error on line {}: {}",
line_num + 1,
e
)));
}
}
}
}
Ok(())
}
pub fn parse_ntriples_line(&self, line: &str) -> Result<Option<Quad>> {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
return Ok(None);
}
if !line.ends_with('.') {
return Err(OxirsError::Parse("Line must end with '.'".to_string()));
}
let line = &line[..line.len() - 1].trim();
let tokens = self.tokenize_ntriples_line(line)?;
if tokens.len() != 3 {
return Err(OxirsError::Parse(format!(
"Expected 3 tokens (subject, predicate, object), found {}",
tokens.len()
)));
}
let subject = self.parse_subject(&tokens[0])?;
let predicate = self.parse_predicate(&tokens[1])?;
let object = self.parse_object(&tokens[2])?;
let triple = Triple::new(subject, predicate, object);
let quad = Quad::from_triple(triple);
Ok(Some(quad))
}
fn tokenize_ntriples_line(&self, line: &str) -> Result<Vec<String>> {
let mut tokens = Vec::new();
let mut current_token = String::new();
let mut in_quotes = false;
let mut escaped = false;
let mut chars = line.chars().peekable();
while let Some(c) = chars.next() {
if escaped {
current_token.push('\\');
current_token.push(c);
escaped = false;
} else if c == '\\' && in_quotes {
escaped = true;
} else if c == '"' && !escaped {
current_token.push(c);
if in_quotes {
if let Some(&'@') = chars.peek() {
current_token.push(chars.next().expect("peeked '@' should be available"));
while let Some(&next_char) = chars.peek() {
if next_char.is_alphanumeric() || next_char == '-' {
current_token
.push(chars.next().expect("peeked char should be available"));
} else {
break;
}
}
} else if chars.peek() == Some(&'^') {
chars.next(); if chars.peek() == Some(&'^') {
chars.next(); current_token.push_str("^^");
if chars.peek() == Some(&'<') {
for next_char in chars.by_ref() {
current_token.push(next_char);
if next_char == '>' {
break;
}
}
}
}
}
in_quotes = false;
} else {
in_quotes = true;
}
} else if c == '"' && escaped {
current_token.push(c);
escaped = false;
} else if c.is_whitespace() && !in_quotes {
if !current_token.is_empty() {
tokens.push(current_token.clone());
current_token.clear();
}
} else {
current_token.push(c);
}
}
if !current_token.is_empty() {
tokens.push(current_token);
}
Ok(tokens)
}
fn parse_subject(&self, token: &str) -> Result<Subject> {
if token.starts_with('<') && token.ends_with('>') {
let iri = &token[1..token.len() - 1];
let named_node = NamedNode::new(iri)?;
Ok(Subject::NamedNode(named_node))
} else if token.starts_with("_:") {
let blank_node = BlankNode::new(token)?;
Ok(Subject::BlankNode(blank_node))
} else {
Err(OxirsError::Parse(format!(
"Invalid subject: {token}. Must be IRI or blank node"
)))
}
}
fn parse_predicate(&self, token: &str) -> Result<Predicate> {
if token.starts_with('<') && token.ends_with('>') {
let iri = &token[1..token.len() - 1];
let named_node = NamedNode::new(iri)?;
Ok(Predicate::NamedNode(named_node))
} else {
Err(OxirsError::Parse(format!(
"Invalid predicate: {token}. Must be IRI"
)))
}
}
fn parse_object(&self, token: &str) -> Result<Object> {
if token.starts_with('<') && token.ends_with('>') {
let iri = &token[1..token.len() - 1];
let named_node = NamedNode::new(iri)?;
Ok(Object::NamedNode(named_node))
} else if token.starts_with("_:") {
let blank_node = BlankNode::new(token)?;
Ok(Object::BlankNode(blank_node))
} else if token.starts_with('"') {
self.parse_literal(token)
} else {
Err(OxirsError::Parse(format!(
"Invalid object: {token}. Must be IRI, blank node, or literal"
)))
}
}
fn parse_literal(&self, token: &str) -> Result<Object> {
if !token.starts_with('"') {
return Err(OxirsError::Parse(
"Literal must start with quote".to_string(),
));
}
let mut end_quote_pos = None;
let mut escaped = false;
let chars: Vec<char> = token.chars().collect();
for (i, &ch) in chars.iter().enumerate().skip(1) {
if escaped {
escaped = false;
continue;
}
if ch == '\\' {
escaped = true;
} else if ch == '"' {
end_quote_pos = Some(i);
break;
}
}
let end_quote_pos =
end_quote_pos.ok_or_else(|| OxirsError::Parse("Unterminated literal".to_string()))?;
let raw_value: String = chars[1..end_quote_pos].iter().collect();
let literal_value = self.unescape_literal_value(&raw_value)?;
let remaining = &token[end_quote_pos + 1..];
if let Some(lang_tag) = remaining.strip_prefix('@') {
let literal = Literal::new_lang(literal_value, lang_tag)?;
Ok(Object::Literal(literal))
} else if remaining.starts_with("^^<") && remaining.ends_with('>') {
let datatype_iri = &remaining[3..remaining.len() - 1];
let datatype = NamedNode::new(datatype_iri)?;
let literal = Literal::new_typed(literal_value, datatype);
Ok(Object::Literal(literal))
} else if remaining.is_empty() {
let literal = Literal::new(literal_value);
Ok(Object::Literal(literal))
} else {
Err(OxirsError::Parse(format!(
"Invalid literal syntax: {token}"
)))
}
}
fn parse_trig<F>(&self, data: &str, mut handler: F) -> Result<()>
where
F: FnMut(Quad) -> Result<()>,
{
let mut parser = TrigParserState::new(self.config.base_iri.as_deref());
for (line_num, line) in data.lines().enumerate() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
match parser.parse_line(line) {
Ok(quads) => {
for quad in quads {
handler(quad)?;
}
}
Err(e) => {
if self.config.ignore_errors {
tracing::warn!("TriG parse error on line {}: {}", line_num + 1, e);
continue;
} else {
return Err(OxirsError::Parse(format!(
"TriG parse error on line {}: {}",
line_num + 1,
e
)));
}
}
}
}
if let Some(quads) = parser.finalize()? {
for quad in quads {
handler(quad)?;
}
}
Ok(())
}
fn parse_nquads<F>(&self, data: &str, mut handler: F) -> Result<()>
where
F: FnMut(Quad) -> Result<()>,
{
for (line_num, line) in data.lines().enumerate() {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
match self.parse_nquads_line(line) {
Ok(Some(quad)) => {
handler(quad)?;
}
Ok(None) => {
continue;
}
Err(e) => {
if self.config.ignore_errors {
tracing::warn!("Parse error on line {}: {}", line_num + 1, e);
continue;
} else {
return Err(OxirsError::Parse(format!(
"Parse error on line {}: {}",
line_num + 1,
e
)));
}
}
}
}
Ok(())
}
pub fn parse_nquads_line(&self, line: &str) -> Result<Option<Quad>> {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
return Ok(None);
}
if !line.ends_with('.') {
return Err(OxirsError::Parse("Line must end with '.'".to_string()));
}
let line = &line[..line.len() - 1].trim();
let tokens = self.tokenize_ntriples_line(line)?;
if tokens.len() != 4 {
return Err(OxirsError::Parse(format!(
"Expected 4 tokens (subject, predicate, object, graph), found {}",
tokens.len()
)));
}
let subject = self.parse_subject(&tokens[0])?;
let predicate = self.parse_predicate(&tokens[1])?;
let object = self.parse_object(&tokens[2])?;
let graph_name = self.parse_graph_name(&tokens[3])?;
let quad = Quad::new(subject, predicate, object, graph_name);
Ok(Some(quad))
}
fn parse_graph_name(&self, token: &str) -> Result<GraphName> {
if token.starts_with('<') && token.ends_with('>') {
let iri = &token[1..token.len() - 1];
let named_node = NamedNode::new(iri)?;
Ok(GraphName::NamedNode(named_node))
} else if token.starts_with("_:") {
let blank_node = BlankNode::new(token)?;
Ok(GraphName::BlankNode(blank_node))
} else {
Err(OxirsError::Parse(format!(
"Invalid graph name: {token}. Must be IRI or blank node"
)))
}
}
fn parse_rdfxml<F>(&self, data: &str, mut handler: F) -> Result<()>
where
F: FnMut(Quad) -> Result<()>,
{
use crate::rdfxml::wrapper::parse_rdfxml;
use std::io::Cursor;
let reader = Cursor::new(data.as_bytes());
let base_iri = self.config.base_iri.as_deref();
let quads = parse_rdfxml(reader, base_iri, self.config.ignore_errors)?;
for quad in quads {
handler(quad)?;
}
Ok(())
}
fn parse_jsonld<F>(&self, data: &str, mut handler: F) -> Result<()>
where
F: FnMut(Quad) -> Result<()>,
{
use crate::jsonld::to_rdf::JsonLdParser;
let parser = JsonLdParser::new();
let parser = if let Some(base_iri) = &self.config.base_iri {
parser
.with_base_iri(base_iri.clone())
.map_err(|e| OxirsError::Parse(format!("Invalid base IRI: {e}")))?
} else {
parser
};
for result in parser.for_slice(data.as_bytes()) {
match result {
Ok(quad) => handler(quad)?,
Err(e) => {
if self.config.ignore_errors {
tracing::warn!("JSON-LD parse error: {}", e);
continue;
} else {
return Err(OxirsError::Parse(format!("JSON-LD parse error: {e}")));
}
}
}
}
Ok(())
}
fn unescape_literal_value(&self, value: &str) -> Result<String> {
let mut result = String::new();
let mut chars = value.chars();
while let Some(c) = chars.next() {
if c == '\\' {
match chars.next() {
Some('"') => result.push('"'),
Some('\\') => result.push('\\'),
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('t') => result.push('\t'),
Some('u') => {
let hex_chars: String = chars.by_ref().take(4).collect();
if hex_chars.len() != 4 {
return Err(OxirsError::Parse(
"Invalid Unicode escape sequence \\uHHHH - expected 4 hex digits"
.to_string(),
));
}
let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
OxirsError::Parse(
"Invalid hex digits in Unicode escape sequence".to_string(),
)
})?;
let unicode_char = char::from_u32(code_point).ok_or_else(|| {
OxirsError::Parse("Invalid Unicode code point".to_string())
})?;
result.push(unicode_char);
}
Some('U') => {
let hex_chars: String = chars.by_ref().take(8).collect();
if hex_chars.len() != 8 {
return Err(OxirsError::Parse(
"Invalid Unicode escape sequence \\UHHHHHHHH - expected 8 hex digits".to_string()
));
}
let code_point = u32::from_str_radix(&hex_chars, 16).map_err(|_| {
OxirsError::Parse(
"Invalid hex digits in Unicode escape sequence".to_string(),
)
})?;
let unicode_char = char::from_u32(code_point).ok_or_else(|| {
OxirsError::Parse("Invalid Unicode code point".to_string())
})?;
result.push(unicode_char);
}
Some(other) => {
return Err(OxirsError::Parse(format!(
"Invalid escape sequence \\{other}"
)));
}
None => {
return Err(OxirsError::Parse(
"Incomplete escape sequence at end of literal".to_string(),
));
}
}
} else {
result.push(c);
}
}
Ok(result)
}
}
pub fn detect_format_from_content(content: &str) -> Option<RdfFormat> {
let content = content.trim();
if content.starts_with("<?xml")
|| content.starts_with("<rdf:RDF")
|| content.starts_with("<RDF")
{
return Some(RdfFormat::RdfXml);
}
if content.starts_with('{') && (content.contains("@context") || content.contains("@type")) {
return Some(RdfFormat::JsonLd);
}
if content.contains("@prefix") || content.contains("@base") || content.contains(';') {
return Some(RdfFormat::Turtle);
}
if content.contains('{') && content.contains('}') {
return Some(RdfFormat::TriG);
}
for line in content.lines() {
let line = line.trim();
if !line.is_empty() && !line.starts_with('#') {
let parts: Vec<&str> = line.split_whitespace().collect();
if parts.len() == 4 && parts[3] == "." {
return Some(RdfFormat::NTriples);
} else if parts.len() == 5 && parts[4] == "." {
return Some(RdfFormat::NQuads);
} else if parts.len() >= 3 && parts[parts.len() - 1] == "." {
return Some(RdfFormat::NTriples);
}
break; }
}
None
}
#[cfg(test)]
mod tests {
use super::*;
use crate::model::graph::Graph;
#[test]
fn test_format_detection_from_extension() {
assert_eq!(RdfFormat::from_extension("ttl"), Some(RdfFormat::Turtle));
assert_eq!(RdfFormat::from_extension("turtle"), Some(RdfFormat::Turtle));
assert_eq!(RdfFormat::from_extension("nt"), Some(RdfFormat::NTriples));
assert_eq!(
RdfFormat::from_extension("ntriples"),
Some(RdfFormat::NTriples)
);
assert_eq!(RdfFormat::from_extension("trig"), Some(RdfFormat::TriG));
assert_eq!(RdfFormat::from_extension("nq"), Some(RdfFormat::NQuads));
assert_eq!(RdfFormat::from_extension("rdf"), Some(RdfFormat::RdfXml));
assert_eq!(RdfFormat::from_extension("jsonld"), Some(RdfFormat::JsonLd));
assert_eq!(RdfFormat::from_extension("unknown"), None);
}
#[test]
fn test_format_properties() {
assert_eq!(RdfFormat::Turtle.media_type(), "text/turtle");
assert_eq!(RdfFormat::NTriples.extension(), "nt");
assert!(RdfFormat::TriG.supports_quads());
assert!(!RdfFormat::Turtle.supports_quads());
}
#[test]
fn test_format_detection_from_content() {
let xml_content = "<?xml version=\"1.0\"?>\n<rdf:RDF>";
assert_eq!(
detect_format_from_content(xml_content),
Some(RdfFormat::RdfXml)
);
let jsonld_content = r#"{"@context": "http://example.org", "@type": "Person"}"#;
assert_eq!(
detect_format_from_content(jsonld_content),
Some(RdfFormat::JsonLd)
);
let turtle_content = "@prefix foaf: <http://xmlns.com/foaf/0.1/> .";
assert_eq!(
detect_format_from_content(turtle_content),
Some(RdfFormat::Turtle)
);
let ntriples_content = "<http://example.org/s> <http://example.org/p> \"object\" .";
assert_eq!(
detect_format_from_content(ntriples_content),
Some(RdfFormat::NTriples)
);
}
#[test]
fn test_ntriples_parsing_simple() {
let ntriples_data = r#"<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" .
<http://example.org/alice> <http://xmlns.com/foaf/0.1/age> "30"^^<http://www.w3.org/2001/XMLSchema#integer> .
_:person1 <http://xmlns.com/foaf/0.1/knows> <http://example.org/bob> ."#;
let parser = Parser::new(RdfFormat::NTriples);
let result = parser.parse_str_to_quads(ntriples_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 3);
for quad in &quads {
assert!(quad.is_default_graph());
}
let triples: Vec<_> = quads.into_iter().map(|q| q.to_triple()).collect();
let alice_iri = NamedNode::new("http://example.org/alice").expect("valid IRI");
let name_pred = NamedNode::new("http://xmlns.com/foaf/0.1/name").expect("valid IRI");
let name_literal = Literal::new("Alice Smith");
let expected_triple1 = Triple::new(alice_iri.clone(), name_pred, name_literal);
assert!(triples.contains(&expected_triple1));
let age_pred = NamedNode::new("http://xmlns.com/foaf/0.1/age").expect("valid IRI");
let integer_type =
NamedNode::new("http://www.w3.org/2001/XMLSchema#integer").expect("valid IRI");
let age_literal = Literal::new_typed("30", integer_type);
let expected_triple2 = Triple::new(alice_iri, age_pred, age_literal);
assert!(triples.contains(&expected_triple2));
let blank_node = BlankNode::new("_:person1").expect("valid blank node id");
let knows_pred = NamedNode::new("http://xmlns.com/foaf/0.1/knows").expect("valid IRI");
let bob_iri = NamedNode::new("http://example.org/bob").expect("valid IRI");
let expected_triple3 = Triple::new(blank_node, knows_pred, bob_iri);
assert!(triples.contains(&expected_triple3));
}
#[test]
fn test_ntriples_parsing_language_tag() {
let ntriples_data =
r#"<http://example.org/alice> <http://example.org/description> "Une personne"@fr ."#;
let parser = Parser::new(RdfFormat::NTriples);
let result = parser.parse_str_to_quads(ntriples_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 1);
let triple = quads[0].to_triple();
if let Object::Literal(literal) = triple.object() {
assert_eq!(literal.value(), "Une personne");
assert_eq!(literal.language(), Some("fr"));
assert!(literal.is_lang_string());
} else {
panic!("Expected literal object");
}
}
#[test]
fn test_ntriples_parsing_escaped_literals() {
let ntriples_data = r#"<http://example.org/test> <http://example.org/desc> "Text with \"quotes\" and \n newlines" ."#;
let parser = Parser::new(RdfFormat::NTriples);
let result = parser.parse_str_to_quads(ntriples_data);
if let Err(e) = &result {
println!("Parse error: {e}");
}
assert!(result.is_ok(), "Parse failed: {result:?}");
let quads = result.expect("should have value");
assert_eq!(quads.len(), 1);
let triple = quads[0].to_triple();
if let Object::Literal(literal) = triple.object() {
assert!(literal.value().contains("\"quotes\""));
assert!(literal.value().contains("\n"));
} else {
panic!("Expected literal object");
}
}
#[test]
fn test_ntriples_parsing_comments_and_empty_lines() {
let ntriples_data = r#"
# This is a comment
<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" .
# Another comment
<http://example.org/bob> <http://xmlns.com/foaf/0.1/name> "Bob Jones" .
"#;
let parser = Parser::new(RdfFormat::NTriples);
let result = parser.parse_str_to_quads(ntriples_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 2);
}
#[test]
fn test_ntriples_parsing_error_handling() {
let invalid_data = "invalid ntriples data";
let parser = Parser::new(RdfFormat::NTriples);
let result = parser.parse_str_to_quads(invalid_data);
assert!(result.is_err());
let mixed_data = r#"<http://example.org/valid> <http://example.org/pred> "Valid triple" .
invalid line here
<http://example.org/valid2> <http://example.org/pred> "Another valid triple" ."#;
let parser_strict = Parser::new(RdfFormat::NTriples);
let result_strict = parser_strict.parse_str_to_quads(mixed_data);
assert!(result_strict.is_err());
let parser_tolerant = Parser::new(RdfFormat::NTriples).with_error_tolerance(true);
let result_tolerant = parser_tolerant.parse_str_to_quads(mixed_data);
assert!(result_tolerant.is_ok());
let quads = result_tolerant.expect("tolerant parse should succeed");
assert_eq!(quads.len(), 2); }
#[test]
fn test_nquads_parsing() {
let nquads_data = r#"<http://example.org/alice> <http://xmlns.com/foaf/0.1/name> "Alice Smith" <http://example.org/graph1> .
<http://example.org/alice> <http://xmlns.com/foaf/0.1/age> "30"^^<http://www.w3.org/2001/XMLSchema#integer> <http://example.org/graph2> .
_:person1 <http://xmlns.com/foaf/0.1/knows> <http://example.org/bob> _:graph1 ."#;
let parser = Parser::new(RdfFormat::NQuads);
let result = parser.parse_str_to_quads(nquads_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 3);
let first_quad = &quads[0];
assert!(!first_quad.is_default_graph());
if let GraphName::NamedNode(graph_name) = first_quad.graph_name() {
assert!(graph_name.as_str().contains("example.org"));
} else {
panic!("Expected named graph");
}
}
#[test]
fn test_turtle_parsing_basic() {
let turtle_data = r#"@prefix foaf: <http://xmlns.com/foaf/0.1/> .
@prefix ex: <http://example.org/> .
ex:alice foaf:name "Alice Smith" .
ex:alice foaf:age "30"^^<http://www.w3.org/2001/XMLSchema#integer> .
ex:alice foaf:knows ex:bob ."#;
let parser = Parser::new(RdfFormat::Turtle);
let result = parser.parse_str_to_quads(turtle_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 3);
for quad in &quads {
assert!(quad.is_default_graph());
}
}
#[test]
fn test_turtle_parsing_prefixes() {
let turtle_data = r#"@prefix foaf: <http://xmlns.com/foaf/0.1/> .
foaf:Person a foaf:Person ."#;
let parser = Parser::new(RdfFormat::Turtle);
let result = parser.parse_str_to_quads(turtle_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 1);
let triple = quads[0].to_triple();
if let Subject::NamedNode(subj) = triple.subject() {
assert!(subj.as_str().contains("xmlns.com/foaf"));
} else {
panic!("Expected named node subject");
}
if let Predicate::NamedNode(pred) = triple.predicate() {
assert!(pred.as_str().contains("rdf-syntax-ns#type"));
} else {
panic!("Expected named node predicate");
}
}
#[test]
fn test_turtle_parsing_abbreviated_syntax() {
let turtle_data = r#"@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
ex:alice foaf:name "Alice" ;
foaf:age "30" ."#;
let parser = Parser::new(RdfFormat::Turtle);
let result = parser.parse_str_to_quads(turtle_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 2);
let subjects: Vec<_> = quads
.iter()
.map(|q| q.to_triple().subject().clone())
.collect();
assert_eq!(subjects[0], subjects[1]);
}
#[test]
fn test_turtle_parsing_base_iri() {
let turtle_data = r#"@base <http://example.org/> .
<alice> <knows> <bob> ."#;
let parser = Parser::new(RdfFormat::Turtle);
let result = parser.parse_str_to_quads(turtle_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 1);
let triple = quads[0].to_triple();
if let Subject::NamedNode(subj) = triple.subject() {
assert!(subj.as_str().contains("example.org"));
} else {
panic!("Expected named node subject");
}
}
#[test]
fn test_turtle_parsing_literals() {
let turtle_data = r#"@prefix ex: <http://example.org/> .
ex:alice ex:name "Alice"@en .
ex:alice ex:age "30"^^<http://www.w3.org/2001/XMLSchema#integer> ."#;
let parser = Parser::new(RdfFormat::Turtle);
let result = parser.parse_str_to_quads(turtle_data);
assert!(result.is_ok());
let quads = result.expect("should have value");
assert_eq!(quads.len(), 2);
let triples: Vec<_> = quads.into_iter().map(|q| q.to_triple()).collect();
let mut found_lang_literal = false;
let mut found_typed_literal = false;
for triple in triples {
if let Object::Literal(literal) = triple.object() {
if literal.language().is_some() {
found_lang_literal = true;
assert_eq!(literal.language(), Some("en"));
} else {
let datatype = literal.datatype();
if datatype.as_str() != "http://www.w3.org/2001/XMLSchema#string"
&& datatype.as_str()
!= "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString"
{
found_typed_literal = true;
assert!(
datatype.as_str().contains("integer"),
"Expected integer datatype but got: {}",
datatype.as_str()
);
}
}
}
}
assert!(found_lang_literal);
assert!(found_typed_literal);
}
#[test]
fn test_parser_round_trip() {
use crate::serializer::Serializer;
let mut original_graph = Graph::new();
let alice = NamedNode::new("http://example.org/alice").expect("valid IRI");
let name_pred = NamedNode::new("http://xmlns.com/foaf/0.1/name").expect("valid IRI");
let name_literal = Literal::new("Alice Smith");
original_graph.insert(Triple::new(alice.clone(), name_pred, name_literal));
let age_pred = NamedNode::new("http://xmlns.com/foaf/0.1/age").expect("valid IRI");
let age_literal = Literal::new_typed("30", crate::vocab::xsd::INTEGER.clone());
original_graph.insert(Triple::new(alice.clone(), age_pred, age_literal));
let desc_pred = NamedNode::new("http://example.org/description").expect("valid IRI");
let desc_literal =
Literal::new_lang("Une personne", "fr").expect("construction should succeed");
original_graph.insert(Triple::new(alice, desc_pred, desc_literal));
let serializer = Serializer::new(RdfFormat::NTriples);
let ntriples = serializer
.serialize_graph(&original_graph)
.expect("operation should succeed");
let parser = Parser::new(RdfFormat::NTriples);
let quads = parser
.parse_str_to_quads(&ntriples)
.expect("operation should succeed");
let parsed_graph = Graph::from_iter(quads.into_iter().map(|q| q.to_triple()));
assert_eq!(original_graph.len(), parsed_graph.len());
for triple in original_graph.iter() {
assert!(
parsed_graph.contains(triple),
"Parsed graph missing triple: {triple}"
);
}
}
#[test]
fn test_trig_parser() {
let trig_data = r#"
@prefix ex: <http://example.org/> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
# Default graph
{
ex:alice rdf:type ex:Person .
ex:alice ex:name "Alice" .
}
# Named graph
ex:graph1 {
ex:bob rdf:type ex:Person .
ex:bob ex:name "Bob" .
ex:bob ex:age "30" .
}
"#;
let parser = Parser::new(RdfFormat::TriG);
let quads = parser
.parse_str_to_quads(trig_data)
.expect("operation should succeed");
assert!(
quads.len() >= 5,
"Should parse at least 5 quads, got {}",
quads.len()
);
let default_graph_count = quads.iter().filter(|q| q.is_default_graph()).count();
let named_graph_count = quads.len() - default_graph_count;
assert!(
default_graph_count >= 2,
"Should have at least 2 default graph quads, got {default_graph_count}"
);
assert!(
named_graph_count >= 3,
"Should have at least 3 named graph quads, got {named_graph_count}"
);
let alice_uri = "http://example.org/alice";
let bob_uri = "http://example.org/bob";
let person_uri = "http://example.org/Person";
let alice_type_found = quads.iter().any(|q| {
q.is_default_graph()
&& q.subject().to_string().contains(alice_uri)
&& q.object().to_string().contains(person_uri)
});
assert!(
alice_type_found,
"Should find Alice type assertion in default graph"
);
let bob_in_named_graph = quads
.iter()
.any(|q| !q.is_default_graph() && q.subject().to_string().contains(bob_uri));
assert!(
bob_in_named_graph,
"Should find Bob statements in named graph"
);
}
#[test]
fn test_trig_parser_prefixes() {
let trig_data = r#"
@prefix ex: <http://example.org/> .
@prefix foaf: <http://xmlns.com/foaf/0.1/> .
ex:person1 foaf:name "John Doe" .
"#;
let parser = Parser::new(RdfFormat::TriG);
let quads = parser
.parse_str_to_quads(trig_data)
.expect("operation should succeed");
assert!(!quads.is_empty(), "Should parse prefixed statements");
let expanded_found = quads.iter().any(|q| {
q.subject()
.to_string()
.contains("http://example.org/person1")
&& q.predicate()
.to_string()
.contains("http://xmlns.com/foaf/0.1/name")
});
assert!(expanded_found, "Should expand prefixes correctly");
}
#[test]
fn test_jsonld_parser() {
let jsonld_data = r#"{
"@context": {
"name": "http://xmlns.com/foaf/0.1/name",
"Person": "http://schema.org/Person"
},
"@type": "Person",
"@id": "http://example.org/john",
"name": "John Doe"
}"#;
let parser = Parser::new(RdfFormat::JsonLd);
let result = parser.parse_str_to_quads(jsonld_data);
match result {
Ok(quads) => {
println!("JSON-LD parsed {} quads:", quads.len());
for quad in &quads {
println!(" {quad}");
}
assert!(!quads.is_empty(), "Should parse some quads from JSON-LD");
}
Err(e) => {
println!("JSON-LD parsing error (expected during development): {e}");
}
}
}
#[test]
fn test_jsonld_parser_simple() {
let jsonld_data = r#"{
"@context": "http://schema.org/",
"@type": "Person",
"name": "Alice"
}"#;
let parser = Parser::new(RdfFormat::JsonLd);
let result = parser.parse_str_to_quads(jsonld_data);
match result {
Ok(quads) => {
println!("Simple JSON-LD parsed {} quads", quads.len());
}
Err(e) => {
println!("Simple JSON-LD parsing error: {e}");
}
}
}
}