#![allow(dead_code)]
use super::error::{ParseResult, RdfParseError, RdfSyntaxError, TextPosition};
use super::n3_lexer::N3Token;
use super::toolkit::{Parser, RuleRecognizer};
use crate::model::{BlankNode, Literal, NamedNode, Object, Predicate, Subject, Triple};
use std::collections::HashMap;
#[derive(Debug, Clone, PartialEq)]
pub enum TurtleNode {
Triple(Triple),
PrefixDeclaration { prefix: String, iri: String },
BaseDeclaration { iri: String },
Comment(String),
}
#[derive(Debug, Clone)]
pub struct TurtleContext {
pub base_iri: Option<String>,
pub prefixes: HashMap<String, String>,
pub blank_node_counter: u64,
pub position: TextPosition,
}
impl Default for TurtleContext {
fn default() -> Self {
let mut prefixes = HashMap::new();
prefixes.insert(
"rdf".to_string(),
"http://www.w3.org/1999/02/22-rdf-syntax-ns#".to_string(),
);
prefixes.insert(
"rdfs".to_string(),
"http://www.w3.org/2000/01/rdf-schema#".to_string(),
);
prefixes.insert(
"xsd".to_string(),
"http://www.w3.org/2001/XMLSchema#".to_string(),
);
prefixes.insert(
"owl".to_string(),
"http://www.w3.org/2002/07/owl#".to_string(),
);
Self {
base_iri: None,
prefixes,
blank_node_counter: 0,
position: TextPosition::start(),
}
}
}
impl TurtleContext {
pub fn new() -> Self {
Self::default()
}
pub fn resolve_prefixed_name(&self, prefix: Option<&str>, local: &str) -> ParseResult<String> {
match prefix {
Some(prefix) => match self.prefixes.get(prefix) {
Some(base_iri) => Ok(format!("{base_iri}{local}")),
None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
format!("Undefined prefix: {prefix}"),
self.position,
))),
},
None => {
match self.prefixes.get("") {
Some(base_iri) => Ok(format!("{base_iri}{local}")),
None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
"No default prefix defined".to_string(),
self.position,
))),
}
}
}
}
pub fn resolve_iri(&self, iri: &str) -> ParseResult<String> {
if self.is_absolute_iri(iri) {
Ok(iri.to_string())
} else {
match &self.base_iri {
Some(base) => Ok(self.resolve_relative_iri(base, iri)),
None => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
format!("Relative IRI without base: {iri}"),
self.position,
))),
}
}
}
pub fn generate_blank_node(&mut self) -> BlankNode {
self.blank_node_counter += 1;
BlankNode::new(format!("_:gen{}", self.blank_node_counter))
.expect("generated blank node format is always valid")
}
fn is_absolute_iri(&self, iri: &str) -> bool {
iri.contains(':') && !iri.starts_with(':')
}
fn resolve_relative_iri(&self, base: &str, relative: &str) -> String {
if relative.is_empty() {
return base.to_string();
}
if base.ends_with('/') || base.ends_with('#') {
format!("{base}{relative}")
} else {
format!("{base}/{relative}")
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum TurtleGrammarState {
ExpectingStatement,
PrefixDeclaration { prefix: Option<String> },
BaseDeclaration,
TripleWithSubject { subject: Subject },
PredicateObjectList {
subject: Subject,
predicates: Vec<(Predicate, Vec<Object>)>,
},
ObjectList {
subject: Subject,
predicate: Predicate,
objects: Vec<Object>,
},
BlankNodePropertyList {
properties: Vec<(Predicate, Vec<Object>)>,
},
Collection { items: Vec<Object> },
ErrorRecovery,
}
#[derive(Debug, Clone)]
pub struct TurtleGrammarRecognizer {
state: TurtleGrammarState,
}
impl Default for TurtleGrammarRecognizer {
fn default() -> Self {
Self {
state: TurtleGrammarState::ExpectingStatement,
}
}
}
impl TurtleGrammarRecognizer {
pub fn new() -> Self {
Self::default()
}
fn parse_term(&self, token: &N3Token, context: &mut TurtleContext) -> ParseResult<Object> {
match token {
N3Token::Iri(iri) => {
let resolved_iri = context.resolve_iri(iri)?;
Ok(Object::NamedNode(
NamedNode::new(resolved_iri)
.map_err(|e| RdfParseError::internal(e.to_string()))?,
))
}
N3Token::PrefixedName { prefix, local } => {
let iri = context.resolve_prefixed_name(prefix.as_deref(), local)?;
Ok(Object::NamedNode(
NamedNode::new(iri).map_err(|e| RdfParseError::internal(e.to_string()))?,
))
}
N3Token::BlankNode(label) => Ok(Object::BlankNode(
BlankNode::new(label.clone())
.map_err(|e| RdfParseError::internal(e.to_string()))?,
)),
N3Token::Literal {
value,
datatype,
language,
} => {
let literal: Literal = match (datatype, language) {
(Some(dt), None) => {
let dt_iri = context.resolve_iri(dt)?;
Literal::new_typed_literal(
value,
NamedNode::new(dt_iri)
.map_err(|e| RdfParseError::internal(e.to_string()))?,
)
}
(None, Some(lang)) => Literal::new_language_tagged_literal(value, lang)
.map_err(|e| RdfParseError::InvalidLanguageTag(e.to_string()))?,
(None, None) => Literal::new_simple_literal(value),
(Some(_), Some(_)) => {
return Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
"Literal cannot have both datatype and language tag".to_string(),
context.position,
)));
}
};
Ok(Object::Literal(literal))
}
N3Token::Integer(i) => {
let xsd_integer = NamedNode::new("http://www.w3.org/2001/XMLSchema#integer")
.map_err(|e| RdfParseError::internal(e.to_string()))?;
Ok(Object::Literal(Literal::new_typed_literal(
i.to_string(),
xsd_integer,
)))
}
N3Token::Decimal(d) => {
let xsd_decimal = NamedNode::new("http://www.w3.org/2001/XMLSchema#decimal")
.map_err(|e| RdfParseError::internal(e.to_string()))?;
Ok(Object::Literal(Literal::new_typed_literal(
d.to_string(),
xsd_decimal,
)))
}
N3Token::Double(d) => {
let xsd_double = NamedNode::new("http://www.w3.org/2001/XMLSchema#double")
.map_err(|e| RdfParseError::internal(e.to_string()))?;
Ok(Object::Literal(Literal::new_typed_literal(
d.to_string(),
xsd_double,
)))
}
N3Token::True => {
let xsd_boolean = NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
.map_err(|e| RdfParseError::internal(e.to_string()))?;
Ok(Object::Literal(Literal::new_typed_literal(
"true",
xsd_boolean,
)))
}
N3Token::False => {
let xsd_boolean = NamedNode::new("http://www.w3.org/2001/XMLSchema#boolean")
.map_err(|e| RdfParseError::internal(e.to_string()))?;
Ok(Object::Literal(Literal::new_typed_literal(
"false",
xsd_boolean,
)))
}
N3Token::A => {
let rdf_type = NamedNode::new("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
.map_err(|e| RdfParseError::internal(e.to_string()))?;
Ok(Object::NamedNode(rdf_type))
}
_ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
format!("Unexpected token in term position: {token:?}"),
context.position,
))),
}
}
fn parse_subject(&self, token: &N3Token, context: &mut TurtleContext) -> ParseResult<Subject> {
match self.parse_term(token, context)? {
Object::NamedNode(n) => Ok(Subject::NamedNode(n)),
Object::BlankNode(b) => Ok(Subject::BlankNode(b)),
_ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
"Invalid subject: must be IRI or blank node".to_string(),
context.position,
))),
}
}
fn parse_predicate(
&self,
token: &N3Token,
context: &mut TurtleContext,
) -> ParseResult<Predicate> {
match self.parse_term(token, context)? {
Object::NamedNode(n) => Ok(Predicate::NamedNode(n)),
_ => Err(RdfParseError::Syntax(RdfSyntaxError::with_position(
"Invalid predicate: must be IRI".to_string(),
context.position,
))),
}
}
}
impl RuleRecognizer<TurtleNode> for TurtleGrammarRecognizer {
fn recognize_next_node<Token>(
&mut self,
_parser: &mut Parser<Token>,
) -> ParseResult<Option<TurtleNode>> {
Ok(None)
}
}
pub struct TurtleParser {
context: TurtleContext,
}
impl TurtleParser {
pub fn new() -> Self {
Self {
context: TurtleContext::new(),
}
}
pub fn parse_str(&mut self, _input: &str) -> ParseResult<Vec<Triple>> {
let results = Vec::new();
Ok(results)
}
pub fn parse_reader<R: std::io::Read>(&mut self, _reader: R) -> ParseResult<Vec<Triple>> {
Ok(Vec::new())
}
pub fn set_base_iri(&mut self, base_iri: String) {
self.context.base_iri = Some(base_iri);
}
pub fn add_prefix(&mut self, prefix: String, iri: String) {
self.context.prefixes.insert(prefix, iri);
}
pub fn context(&self) -> &TurtleContext {
&self.context
}
}
impl Default for TurtleParser {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_turtle_context_prefix_resolution() {
let context = TurtleContext::new();
let resolved = context
.resolve_prefixed_name(Some("rdf"), "type")
.expect("prefix resolution should succeed");
assert_eq!(resolved, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
assert!(context
.resolve_prefixed_name(Some("undefined"), "test")
.is_err());
}
#[test]
fn test_turtle_context_iri_resolution() {
let mut context = TurtleContext::new();
context.base_iri = Some("http://example.org/".to_string());
let resolved = context
.resolve_iri("http://other.org/test")
.expect("operation should succeed");
assert_eq!(resolved, "http://other.org/test");
let resolved = context
.resolve_iri("relative")
.expect("operation should succeed");
assert_eq!(resolved, "http://example.org/relative");
context.base_iri = None;
assert!(context.resolve_iri("relative").is_err());
}
#[test]
fn test_blank_node_generation() {
let mut context = TurtleContext::new();
let bn1 = context.generate_blank_node();
let bn2 = context.generate_blank_node();
assert_ne!(bn1, bn2);
assert!(bn1.to_string().starts_with("_:gen"));
assert!(bn2.to_string().starts_with("_:gen"));
}
#[test]
fn test_turtle_parser_creation() {
let parser = TurtleParser::new();
assert!(parser.context.prefixes.contains_key("rdf"));
assert!(parser.context.prefixes.contains_key("xsd"));
}
}