use crate::error::{TextPosition, TurtleParseError, TurtleResult, TurtleSyntaxError};
use crate::toolkit::{FormattedWriter, Parser, SerializationConfig, Serializer};
use oxirs_core::model::{BlankNode, Literal, NamedNode, Object, Predicate, Subject, Triple};
use std::io::{BufRead, BufReader, Read, Write};
#[derive(Debug, Clone)]
pub struct NTriplesParser {
pub lenient: bool,
}
impl Default for NTriplesParser {
fn default() -> Self {
Self::new()
}
}
impl NTriplesParser {
pub fn new() -> Self {
Self { lenient: false }
}
pub fn new_lenient() -> Self {
Self { lenient: true }
}
fn strip_inline_comment<'a>(&self, line: &'a str) -> &'a str {
let mut in_string = false;
let mut in_iri = false;
let mut escaped = false;
for (i, ch) in line.char_indices() {
if escaped {
escaped = false;
continue;
}
match ch {
'\\' if in_string => escaped = true,
'"' => in_string = !in_string,
'<' if !in_string => in_iri = true,
'>' if !in_string => in_iri = false,
'#' if !in_string && !in_iri => return line[..i].trim_end(),
_ => {}
}
}
line
}
pub fn parse_line(&self, line: &str, line_number: usize) -> TurtleResult<Option<Triple>> {
let line = line.trim();
if line.is_empty() || line.starts_with('#') {
return Ok(None);
}
let line = self.strip_inline_comment(line);
if !line.ends_with('.') {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: "N-Triples line must end with '.'".to_string(),
position: TextPosition::new(line_number, line.len(), 0),
}));
}
let line = &line[..line.len() - 1].trim();
let tokens = self.tokenize_line(line, line_number)?;
if tokens.len() != 3 {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!(
"Expected 3 tokens (subject predicate object), found {}",
tokens.len()
),
position: TextPosition::new(line_number, 1, 0),
}));
}
let subject = self.parse_subject(&tokens[0], line_number)?;
let predicate = self.parse_predicate(&tokens[1], line_number)?;
let object = self.parse_object(&tokens[2], line_number)?;
Ok(Some(Triple::new(subject, predicate, object)))
}
fn tokenize_line(&self, line: &str, line_number: usize) -> TurtleResult<Vec<String>> {
let mut tokens = Vec::new();
let mut current_token = String::new();
let mut in_quotes = false;
let mut escaped = false;
for (_i, ch) in line.char_indices() {
if escaped {
current_token.push(ch);
escaped = false;
continue;
}
match ch {
'\\' if in_quotes => {
escaped = true;
current_token.push(ch);
}
'"' => {
in_quotes = !in_quotes;
current_token.push(ch);
}
' ' | '\t' if !in_quotes => {
if !current_token.is_empty() {
tokens.push(current_token.clone());
current_token.clear();
}
}
_ => {
current_token.push(ch);
}
}
}
if in_quotes {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: "Unterminated string literal".to_string(),
position: TextPosition::new(line_number, line.len(), 0),
}));
}
if !current_token.is_empty() {
tokens.push(current_token);
}
Ok(tokens)
}
fn parse_subject(&self, token: &str, line_number: usize) -> TurtleResult<Subject> {
if token.starts_with('<') && token.ends_with('>') {
let iri = &token[1..token.len() - 1];
let named_node = NamedNode::new(iri).map_err(TurtleParseError::model)?;
Ok(Subject::NamedNode(named_node))
} else if let Some(id) = token.strip_prefix("_:") {
let blank_node = BlankNode::new(id).map_err(TurtleParseError::model)?;
Ok(Subject::BlankNode(blank_node))
} else {
Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Invalid subject: {token}"),
position: TextPosition::new(line_number, 1, 0),
}))
}
}
fn parse_predicate(&self, token: &str, line_number: usize) -> TurtleResult<Predicate> {
if token.starts_with('<') && token.ends_with('>') {
let iri = &token[1..token.len() - 1];
let named_node = NamedNode::new(iri).map_err(TurtleParseError::model)?;
Ok(Predicate::NamedNode(named_node))
} else {
Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Invalid predicate: {token}"),
position: TextPosition::new(line_number, 1, 0),
}))
}
}
fn parse_object(&self, token: &str, line_number: usize) -> TurtleResult<Object> {
if token.starts_with('<') && token.ends_with('>') {
let iri = &token[1..token.len() - 1];
let named_node = NamedNode::new(iri).map_err(TurtleParseError::model)?;
Ok(Object::NamedNode(named_node))
} else if let Some(id) = token.strip_prefix("_:") {
let blank_node = BlankNode::new(id).map_err(TurtleParseError::model)?;
Ok(Object::BlankNode(blank_node))
} else if token.starts_with('"') {
self.parse_literal(token, line_number)
} else {
Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Invalid object: {token}"),
position: TextPosition::new(line_number, 1, 0),
}))
}
}
fn parse_literal(&self, token: &str, line_number: usize) -> TurtleResult<Object> {
if !token.starts_with('"') {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: "Literal must start with quote".to_string(),
position: TextPosition::new(line_number, 1, 0),
}));
}
let mut end_quote_byte_idx = None;
let mut escaped = false;
let char_indices = token.char_indices().skip(1);
for (byte_idx, ch) in char_indices {
if escaped {
escaped = false;
continue;
}
if ch == '\\' {
escaped = true;
} else if ch == '"' {
end_quote_byte_idx = Some(byte_idx);
break;
}
}
let end_quote_byte_idx = end_quote_byte_idx.ok_or_else(|| {
TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: "Unterminated string literal".to_string(),
position: TextPosition::new(line_number, token.len(), 0),
})
})?;
let value = &token[1..end_quote_byte_idx];
let remainder = &token[end_quote_byte_idx + 1..];
let unescaped_value = self.unescape_string(value)?;
if remainder.is_empty() {
let literal = Literal::new_simple_literal(&unescaped_value);
Ok(Object::Literal(literal))
} else if let Some(language) = remainder.strip_prefix('@') {
let literal = Literal::new_language_tagged_literal(&unescaped_value, language)
.map_err(|e| TurtleParseError::model(e.into()))?;
Ok(Object::Literal(literal))
} else if remainder.starts_with("^^<") && remainder.ends_with('>') {
let datatype_iri = &remainder[3..remainder.len() - 1];
let datatype = NamedNode::new(datatype_iri).map_err(TurtleParseError::model)?;
let literal = Literal::new_typed_literal(&unescaped_value, datatype);
Ok(Object::Literal(literal))
} else {
Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: format!("Invalid literal suffix: {remainder}"),
position: TextPosition::new(line_number, end_quote_byte_idx + 1, 0),
}))
}
}
fn unescape_string(&self, s: &str) -> TurtleResult<String> {
let mut result = String::with_capacity(s.len());
let mut chars = s.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '\\' {
match chars.next() {
Some('t') => result.push('\t'),
Some('n') => result.push('\n'),
Some('r') => result.push('\r'),
Some('\\') => result.push('\\'),
Some('"') => result.push('"'),
Some('u') => {
let mut code = String::new();
for _ in 0..4 {
if let Some(hex_char) = chars.next() {
code.push(hex_char);
} else {
return Err(TurtleParseError::syntax(
TurtleSyntaxError::InvalidEscape {
sequence: format!("u{code}"),
position: TextPosition::start(),
},
));
}
}
let code_point = u32::from_str_radix(&code, 16).map_err(|_| {
TurtleParseError::syntax(TurtleSyntaxError::InvalidEscape {
sequence: format!("u{code}"),
position: TextPosition::start(),
})
})?;
let unicode_char = char::from_u32(code_point).ok_or_else(|| {
TurtleParseError::syntax(TurtleSyntaxError::InvalidUnicode {
codepoint: code_point,
position: TextPosition::start(),
})
})?;
result.push(unicode_char);
}
Some('U') => {
let mut code = String::new();
for _ in 0..8 {
if let Some(hex_char) = chars.next() {
code.push(hex_char);
} else {
return Err(TurtleParseError::syntax(
TurtleSyntaxError::InvalidEscape {
sequence: format!("U{code}"),
position: TextPosition::start(),
},
));
}
}
let code_point = u32::from_str_radix(&code, 16).map_err(|_| {
TurtleParseError::syntax(TurtleSyntaxError::InvalidEscape {
sequence: format!("U{code}"),
position: TextPosition::start(),
})
})?;
let unicode_char = char::from_u32(code_point).ok_or_else(|| {
TurtleParseError::syntax(TurtleSyntaxError::InvalidUnicode {
codepoint: code_point,
position: TextPosition::start(),
})
})?;
result.push(unicode_char);
}
Some(other) => {
return Err(TurtleParseError::syntax(TurtleSyntaxError::InvalidEscape {
sequence: other.to_string(),
position: TextPosition::start(),
}));
}
None => {
return Err(TurtleParseError::syntax(TurtleSyntaxError::UnexpectedEof {
position: TextPosition::start(),
}));
}
}
} else {
result.push(ch);
}
}
Ok(result)
}
}
impl Parser<Triple> for NTriplesParser {
fn parse<R: Read>(&self, reader: R) -> TurtleResult<Vec<Triple>> {
let mut triples = Vec::new();
let mut errors = Vec::new();
for (line_number, line_result) in BufReader::new(reader).lines().enumerate() {
let line = line_result.map_err(TurtleParseError::io)?;
match self.parse_line(&line, line_number + 1) {
Ok(Some(triple)) => triples.push(triple),
Ok(None) => {} Err(e) => {
if self.lenient {
errors.push(e);
} else {
return Err(e);
}
}
}
}
if !errors.is_empty() && self.lenient {
eprintln!(
"Warning: {} errors encountered during parsing",
errors.len()
);
}
Ok(triples)
}
fn for_reader<R: BufRead + 'static>(
&self,
reader: R,
) -> Box<dyn Iterator<Item = TurtleResult<Triple>>> {
Box::new(NTriplesIterator {
lines: reader.lines().enumerate(),
parser: self.clone(),
})
}
}
pub struct NTriplesIterator<L> {
lines: L,
parser: NTriplesParser,
}
impl<L> Iterator for NTriplesIterator<L>
where
L: Iterator<Item = (usize, std::io::Result<String>)>,
{
type Item = TurtleResult<Triple>;
fn next(&mut self) -> Option<Self::Item> {
loop {
match self.lines.next() {
None => return None,
Some((line_number, Ok(line))) => {
match self.parser.parse_line(&line, line_number + 1) {
Ok(Some(triple)) => return Some(Ok(triple)),
Ok(None) => continue, Err(e) => {
if self.parser.lenient {
continue; } else {
return Some(Err(e));
}
}
}
}
Some((_, Err(io_err))) => {
return Some(Err(TurtleParseError::io(io_err)));
}
}
}
}
}
#[derive(Debug, Clone)]
pub struct NTriplesSerializer {
config: SerializationConfig,
}
impl Default for NTriplesSerializer {
fn default() -> Self {
Self::new()
}
}
impl NTriplesSerializer {
pub fn new() -> Self {
Self {
config: SerializationConfig {
pretty: false, use_prefixes: false, ..SerializationConfig::default()
},
}
}
fn serialize_subject<W: Write>(
&self,
subject: &Subject,
writer: &mut FormattedWriter<W>,
) -> TurtleResult<()> {
match subject {
Subject::NamedNode(nn) => {
writer
.write_str(&format!("<{}>", nn.as_str()))
.map_err(TurtleParseError::io)?;
}
Subject::BlankNode(bn) => {
writer
.write_str(&format!("_:{}", bn.as_str()))
.map_err(TurtleParseError::io)?;
}
Subject::Variable(var) => {
writer
.write_str(&format!("?{}", var.as_str()))
.map_err(TurtleParseError::io)?;
}
Subject::QuotedTriple(_) => {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: "QuotedTriple subjects not supported in N-Triples".to_string(),
position: TextPosition::default(),
}));
}
}
Ok(())
}
fn serialize_predicate<W: Write>(
&self,
predicate: &Predicate,
writer: &mut FormattedWriter<W>,
) -> TurtleResult<()> {
match predicate {
Predicate::NamedNode(nn) => {
writer
.write_str(&format!("<{}>", nn.as_str()))
.map_err(TurtleParseError::io)?;
}
Predicate::Variable(var) => {
writer
.write_str(&format!("?{}", var.as_str()))
.map_err(TurtleParseError::io)?;
}
}
Ok(())
}
fn serialize_object<W: Write>(
&self,
object: &Object,
writer: &mut FormattedWriter<W>,
) -> TurtleResult<()> {
match object {
Object::NamedNode(nn) => {
writer
.write_str(&format!("<{}>", nn.as_str()))
.map_err(TurtleParseError::io)?;
}
Object::BlankNode(bn) => {
writer
.write_str(&format!("_:{}", bn.as_str()))
.map_err(TurtleParseError::io)?;
}
Object::Literal(literal) => {
let escaped = writer.escape_string(literal.value());
writer.write_str(&escaped).map_err(TurtleParseError::io)?;
if let Some(language) = literal.language() {
writer
.write_str(&format!("@{language}"))
.map_err(TurtleParseError::io)?;
} else if literal.datatype().as_str() != "http://www.w3.org/2001/XMLSchema#string" {
writer
.write_str(&format!("^^<{}>", literal.datatype().as_str()))
.map_err(TurtleParseError::io)?;
}
}
Object::Variable(var) => {
writer
.write_str(&format!("?{}", var.as_str()))
.map_err(TurtleParseError::io)?;
}
Object::QuotedTriple(_) => {
return Err(TurtleParseError::syntax(TurtleSyntaxError::Generic {
message: "QuotedTriple objects not supported in N-Triples".to_string(),
position: TextPosition::default(),
}));
}
}
Ok(())
}
}
impl Serializer<Triple> for NTriplesSerializer {
fn serialize<W: Write>(&self, triples: &[Triple], writer: W) -> TurtleResult<()> {
let mut formatted_writer = FormattedWriter::new(writer, self.config.clone());
for triple in triples {
self.serialize_item(triple, &mut formatted_writer)?;
formatted_writer
.write_str(" .\n")
.map_err(TurtleParseError::io)?;
}
Ok(())
}
fn serialize_item<W: Write>(&self, triple: &Triple, writer: W) -> TurtleResult<()> {
let mut formatted_writer = FormattedWriter::new(writer, self.config.clone());
self.serialize_item_formatted(triple, &mut formatted_writer)
}
}
impl NTriplesSerializer {
fn serialize_item_formatted<W: Write>(
&self,
triple: &Triple,
writer: &mut FormattedWriter<W>,
) -> TurtleResult<()> {
self.serialize_subject(triple.subject(), writer)?;
writer.write_str(" ").map_err(TurtleParseError::io)?;
self.serialize_predicate(triple.predicate(), writer)?;
writer.write_str(" ").map_err(TurtleParseError::io)?;
self.serialize_object(triple.object(), writer)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_parse_simple_triple() {
let parser = NTriplesParser::new();
let input = r#"<http://example.org/subject> <http://example.org/predicate> "object" ."#;
let triple = parser
.parse_line(input, 1)
.expect("parsing should succeed")
.expect("parsing should succeed");
if let Subject::NamedNode(subject) = triple.subject() {
assert_eq!(subject.as_str(), "http://example.org/subject");
} else {
panic!("Expected named node subject");
}
}
#[test]
fn test_parse_multiple_triples() {
let parser = NTriplesParser::new();
let input = r#"<http://example.org/s1> <http://example.org/p1> "o1" .
<http://example.org/s2> <http://example.org/p2> "o2" .
"#;
let triples = parser
.parse(Cursor::new(input))
.expect("parsing should succeed");
assert_eq!(triples.len(), 2);
}
#[test]
fn test_serialize_triple() {
let serializer = NTriplesSerializer::new();
let subject =
Subject::NamedNode(NamedNode::new("http://example.org/subject").expect("valid IRI"));
let predicate = Predicate::NamedNode(
NamedNode::new("http://example.org/predicate").expect("valid IRI"),
);
let object = Object::Literal(Literal::new_simple_literal("object"));
let triple = Triple::new(subject, predicate, object);
let mut output = Vec::new();
serializer
.serialize(&[triple], &mut output)
.expect("serialization should succeed");
let output_str = String::from_utf8(output).expect("valid UTF-8");
assert!(output_str.contains("<http://example.org/subject>"));
assert!(output_str.contains("\"object\""));
}
}