pub mod expander;
pub mod tests;
pub mod tokenizer;
pub use expander::{annotations_to_turtle, expand_annotations, to_explicit_turtle};
pub use tokenizer::{find_annotation_blocks, tokenize_annotation_block, AnnotationToken};
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Debug, Clone, Error)]
pub enum AnnotationSyntaxError {
#[error("unexpected token: expected {expected}, got {got}")]
UnexpectedToken { expected: String, got: String },
#[error("unclosed annotation block: missing '|}}' terminator")]
UnclosedAnnotation,
#[error("invalid subject: {0}")]
InvalidSubject(String),
#[error("invalid predicate: {0}")]
InvalidPredicate(String),
#[error("empty annotation block")]
EmptyAnnotation,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AnnotationLiteral {
pub value: String,
pub datatype: Option<String>,
pub language: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum StarTerm {
NamedNode(String),
BlankNode(String),
Literal(AnnotationLiteral),
QuotedTriple(Box<RdfStarTriple>),
}
impl StarTerm {
pub fn as_named_node(&self) -> Option<&str> {
if let Self::NamedNode(iri) = self {
Some(iri.as_str())
} else {
None
}
}
pub fn as_blank_node(&self) -> Option<&str> {
if let Self::BlankNode(id) = self {
Some(id.as_str())
} else {
None
}
}
pub fn to_turtle(&self) -> String {
match self {
Self::NamedNode(iri) => format!("<{}>", iri),
Self::BlankNode(id) => format!("_:{}", id),
Self::Literal(lit) => {
if let Some(lang) = &lit.language {
format!("\"{}\"@{}", escape_turtle_string(&lit.value), lang)
} else if let Some(dt) = &lit.datatype {
format!("\"{}\"^^<{}>", escape_turtle_string(&lit.value), dt)
} else {
format!("\"{}\"", escape_turtle_string(&lit.value))
}
}
Self::QuotedTriple(triple) => {
format!(
"<< {} {} {} >>",
triple.subject.to_turtle(),
triple.predicate.to_turtle(),
triple.object.to_turtle()
)
}
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct RdfStarTriple {
pub subject: StarTerm,
pub predicate: StarTerm,
pub object: StarTerm,
}
impl RdfStarTriple {
pub fn new(subject: StarTerm, predicate: StarTerm, object: StarTerm) -> Self {
Self {
subject,
predicate,
object,
}
}
pub fn to_turtle(&self) -> String {
format!(
"{} {} {} .",
self.subject.to_turtle(),
self.predicate.to_turtle(),
self.object.to_turtle()
)
}
pub fn to_quoted(&self) -> String {
format!(
"<< {} {} {} >>",
self.subject.to_turtle(),
self.predicate.to_turtle(),
self.object.to_turtle()
)
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AnnotationPair {
pub predicate: String,
pub object: AnnotationValue,
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum AnnotationValue {
NamedNode(String),
BlankNode(String),
Literal(AnnotationLiteral),
}
impl AnnotationValue {
pub fn to_turtle(&self) -> String {
match self {
Self::NamedNode(iri) => format!("<{}>", iri),
Self::BlankNode(id) => format!("_:{}", id),
Self::Literal(lit) => {
if let Some(lang) = &lit.language {
format!("\"{}\"@{}", escape_turtle_string(&lit.value), lang)
} else if let Some(dt) = &lit.datatype {
format!("\"{}\"^^<{}>", escape_turtle_string(&lit.value), dt)
} else {
format!("\"{}\"", escape_turtle_string(&lit.value))
}
}
}
}
}
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct AnnotatedTriple {
pub base: RdfStarTriple,
pub annotations: Vec<AnnotationPair>,
}
impl AnnotatedTriple {
pub fn new(base: RdfStarTriple, annotations: Vec<AnnotationPair>) -> Self {
Self { base, annotations }
}
}
pub struct AnnotationParser;
impl AnnotationParser {
pub fn new() -> Self {
Self
}
pub fn parse_annotated_triple(
&self,
input: &str,
) -> Result<AnnotatedTriple, AnnotationSyntaxError> {
let annotation_start = input
.find("{|")
.ok_or(AnnotationSyntaxError::UnclosedAnnotation)?;
let base_str = input[..annotation_start].trim();
let rest = &input[annotation_start + 2..];
let annotation_end = rest
.rfind("|}")
.ok_or(AnnotationSyntaxError::UnclosedAnnotation)?;
let annotation_body = rest[..annotation_end].trim();
let base = self.parse_triple(base_str)?;
let tokens = tokenize_annotation_block(annotation_body)?;
let annotations = self.parse_annotation_pairs(&tokens)?;
Ok(AnnotatedTriple::new(base, annotations))
}
fn parse_triple(&self, input: &str) -> Result<RdfStarTriple, AnnotationSyntaxError> {
if let Some(inner) = input.strip_prefix("<<") {
let inner = inner.trim_start();
let close = inner
.rfind(">>")
.ok_or_else(|| AnnotationSyntaxError::InvalidSubject(input.to_string()))?;
let quoted_str = inner[..close].trim();
let quoted = self.parse_triple(quoted_str)?;
let after = inner[close + 2..].trim();
let (pred_str, obj_str) =
split_two_terms(after).ok_or_else(|| AnnotationSyntaxError::UnexpectedToken {
expected: "predicate object".to_string(),
got: after.to_string(),
})?;
let subject = StarTerm::QuotedTriple(Box::new(quoted));
let predicate = self.parse_predicate_term(pred_str)?;
let object = self.parse_object_term(obj_str)?;
return Ok(RdfStarTriple::new(subject, predicate, object));
}
let (subj_str, rest) =
split_first_term(input).ok_or_else(|| AnnotationSyntaxError::UnexpectedToken {
expected: "subject".to_string(),
got: input.to_string(),
})?;
let (pred_str, obj_str) =
split_two_terms(rest.trim()).ok_or_else(|| AnnotationSyntaxError::UnexpectedToken {
expected: "predicate object".to_string(),
got: rest.to_string(),
})?;
let subject = self.parse_subject_term(subj_str)?;
let predicate = self.parse_predicate_term(pred_str)?;
let object = self.parse_object_term(obj_str)?;
Ok(RdfStarTriple::new(subject, predicate, object))
}
fn parse_subject_term(&self, s: &str) -> Result<StarTerm, AnnotationSyntaxError> {
self.parse_term(s)
}
fn parse_predicate_term(&self, s: &str) -> Result<StarTerm, AnnotationSyntaxError> {
let term = self.parse_term(s)?;
if matches!(term, StarTerm::BlankNode(_)) {
return Err(AnnotationSyntaxError::InvalidPredicate(
"blank node cannot be used as predicate".to_string(),
));
}
Ok(term)
}
fn parse_object_term(&self, s: &str) -> Result<StarTerm, AnnotationSyntaxError> {
self.parse_term(s)
}
fn parse_term(&self, s: &str) -> Result<StarTerm, AnnotationSyntaxError> {
let s = s.trim().trim_end_matches('.');
let s = s.trim();
if s.starts_with('<') {
let iri = s
.strip_prefix('<')
.and_then(|t| t.strip_suffix('>'))
.ok_or_else(|| AnnotationSyntaxError::UnexpectedToken {
expected: "IRI like <...>".to_string(),
got: s.to_string(),
})?;
return Ok(StarTerm::NamedNode(iri.to_string()));
}
if let Some(rest) = s.strip_prefix("_:") {
return Ok(StarTerm::BlankNode(rest.to_string()));
}
if s.starts_with('"') {
return self.parse_literal_term(s);
}
Err(AnnotationSyntaxError::UnexpectedToken {
expected: "<IRI>, _:blank, or \"literal\"".to_string(),
got: s.to_string(),
})
}
fn parse_literal_term(&self, s: &str) -> Result<StarTerm, AnnotationSyntaxError> {
let rest = s
.strip_prefix('"')
.ok_or_else(|| AnnotationSyntaxError::UnexpectedToken {
expected: "string literal".to_string(),
got: s.to_string(),
})?;
let mut chars = rest.char_indices();
let mut close_pos = None;
let mut escaped = false;
for (i, c) in chars.by_ref() {
if escaped {
escaped = false;
continue;
}
if c == '\\' {
escaped = true;
continue;
}
if c == '"' {
close_pos = Some(i);
break;
}
}
let close_pos = close_pos.ok_or_else(|| AnnotationSyntaxError::UnexpectedToken {
expected: "closing quote for literal".to_string(),
got: s.to_string(),
})?;
let value = rest[..close_pos].to_string();
let after = rest[close_pos + 1..].trim();
if let Some(lang_rest) = after.strip_prefix('@') {
let lang = lang_rest.trim().to_string();
return Ok(StarTerm::Literal(AnnotationLiteral {
value,
language: Some(lang),
datatype: None,
}));
}
if let Some(dt_rest) = after.strip_prefix("^^") {
let dt_rest = dt_rest.trim();
let datatype = if dt_rest.starts_with('<') {
dt_rest
.strip_prefix('<')
.and_then(|t| t.strip_suffix('>'))
.unwrap_or(dt_rest)
.to_string()
} else {
dt_rest.to_string()
};
return Ok(StarTerm::Literal(AnnotationLiteral {
value,
language: None,
datatype: Some(datatype),
}));
}
Ok(StarTerm::Literal(AnnotationLiteral {
value,
language: None,
datatype: None,
}))
}
fn parse_annotation_pairs(
&self,
tokens: &[AnnotationToken],
) -> Result<Vec<AnnotationPair>, AnnotationSyntaxError> {
if tokens.is_empty() {
return Err(AnnotationSyntaxError::EmptyAnnotation);
}
let mut pairs = Vec::new();
let mut i = 0;
while i < tokens.len() {
let predicate = match &tokens[i] {
AnnotationToken::NamedNode(iri) => iri.clone(),
AnnotationToken::Dot | AnnotationToken::Semicolon | AnnotationToken::Comma => {
i += 1;
continue;
}
other => {
return Err(AnnotationSyntaxError::InvalidPredicate(format!(
"expected named node as predicate, got {:?}",
other
)));
}
};
i += 1;
if i >= tokens.len() {
return Err(AnnotationSyntaxError::UnexpectedToken {
expected: "object after predicate".to_string(),
got: "end of tokens".to_string(),
});
}
let object = match &tokens[i] {
AnnotationToken::NamedNode(iri) => AnnotationValue::NamedNode(iri.clone()),
AnnotationToken::BlankNode(id) => AnnotationValue::BlankNode(id.clone()),
AnnotationToken::Literal(value, lang, datatype) => {
AnnotationValue::Literal(AnnotationLiteral {
value: value.clone(),
language: lang.clone(),
datatype: datatype.clone(),
})
}
other => {
return Err(AnnotationSyntaxError::UnexpectedToken {
expected: "object value".to_string(),
got: format!("{:?}", other),
});
}
};
i += 1;
pairs.push(AnnotationPair { predicate, object });
if i < tokens.len() {
match &tokens[i] {
AnnotationToken::Semicolon | AnnotationToken::Comma | AnnotationToken::Dot => {
i += 1;
}
_ => {}
}
}
}
if pairs.is_empty() {
return Err(AnnotationSyntaxError::EmptyAnnotation);
}
Ok(pairs)
}
}
impl Default for AnnotationParser {
fn default() -> Self {
Self::new()
}
}
fn escape_turtle_string(s: &str) -> String {
s.replace('\\', "\\\\")
.replace('"', "\\\"")
.replace('\n', "\\n")
.replace('\r', "\\r")
.replace('\t', "\\t")
}
fn split_first_term(input: &str) -> Option<(&str, &str)> {
let input = input.trim();
if input.is_empty() {
return None;
}
if input.starts_with('<') {
let end = input.find('>')?;
return Some((&input[..end + 1], &input[end + 1..]));
}
if let Some(rest) = input.strip_prefix("_:") {
let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
return Some((&input[..end + 2], &input[end + 2..]));
}
if input.starts_with('"') {
let rest = input.strip_prefix('"').expect("starts_with checked above");
let mut escaped = false;
let mut close = None;
for (i, c) in rest.char_indices() {
if escaped {
escaped = false;
continue;
}
if c == '\\' {
escaped = true;
continue;
}
if c == '"' {
close = Some(i);
break;
}
}
let close = close?;
let after = &rest[close + 1..];
let extra_end = if let Some(lang_rest) = after.strip_prefix('@') {
1 + lang_rest
.find(|c: char| c.is_whitespace())
.unwrap_or(lang_rest.len())
} else if let Some(dt_rest) = after.strip_prefix("^^<") {
let dt_end = dt_rest.find('>')?;
3 + dt_end + 1
} else {
0
};
let term_end = 1 + close + 1 + extra_end; return Some((&input[..term_end], &input[term_end..]));
}
None
}
fn split_two_terms(input: &str) -> Option<(&str, &str)> {
let input = input.trim();
let (first, rest) = split_first_term(input)?;
let rest = rest.trim();
if rest.is_empty() {
return None;
}
let (second, _) = split_first_term(rest)?;
Some((first, second))
}