use std::collections::HashMap;
use std::fmt;
use oxirs_core::model::{NamedNode, Object, Predicate, Subject, Triple};
use thiserror::Error;
#[derive(Debug, Error)]
pub enum MappingError {
#[error("Missing column '{column}' in row {row_index}")]
MissingColumn {
column: String,
row_index: usize,
},
#[error("Template '{template}' references unknown column '{column}' in row {row_index}")]
UnresolvableTemplate {
template: String,
column: String,
row_index: usize,
},
#[error("Invalid IRI generated from template '{template}': '{iri}'")]
InvalidIri {
template: String,
iri: String,
},
#[error("Invalid predicate IRI: '{iri}'")]
InvalidPredicateIri {
iri: String,
},
#[error("Invalid object IRI: '{iri}'")]
InvalidObjectIri {
iri: String,
},
#[error("JSON parse error: {message}")]
JsonParseError {
message: String,
},
#[error("CSV parse error at line {line}: {message}")]
CsvParseError {
line: usize,
message: String,
},
#[error("JSON path '{path}' did not match any array in the document")]
JsonPathNoMatch {
path: String,
},
#[error("Data source produced no rows")]
EmptyDataSource,
#[error("RDF model error: {0}")]
RdfModelError(String),
}
pub type MappingResult<T> = Result<T, MappingError>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Row {
pub values: HashMap<String, String>,
}
impl Row {
pub fn new() -> Self {
Self {
values: HashMap::new(),
}
}
pub fn from_pairs(pairs: impl IntoIterator<Item = (String, String)>) -> Self {
Self {
values: pairs.into_iter().collect(),
}
}
pub fn get(&self, column: &str) -> Option<&str> {
self.values.get(column).map(String::as_str)
}
pub fn contains(&self, column: &str) -> bool {
self.values.contains_key(column)
}
pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
self.values.iter().map(|(k, v)| (k.as_str(), v.as_str()))
}
}
impl Default for Row {
fn default() -> Self {
Self::new()
}
}
impl fmt::Display for Row {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut entries: Vec<_> = self.values.iter().collect();
entries.sort_by_key(|(k, _)| k.as_str());
write!(f, "{{")?;
for (i, (k, v)) in entries.iter().enumerate() {
if i > 0 {
write!(f, ", ")?;
}
write!(f, "{k}: {v}")?;
}
write!(f, "}}")
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Template {
pub pattern: String,
}
impl Template {
pub fn new(pattern: impl Into<String>) -> Self {
Self {
pattern: pattern.into(),
}
}
pub fn render(&self, row: &Row, row_index: usize) -> MappingResult<String> {
let mut output = String::with_capacity(self.pattern.len() + 32);
let mut chars = self.pattern.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '{' {
let mut col_name = String::new();
let mut closed = false;
for inner in chars.by_ref() {
if inner == '}' {
closed = true;
break;
}
col_name.push(inner);
}
if !closed {
output.push('{');
output.push_str(&col_name);
continue;
}
let value =
row.get(&col_name)
.ok_or_else(|| MappingError::UnresolvableTemplate {
template: self.pattern.clone(),
column: col_name.clone(),
row_index,
})?;
percent_encode_path(value, &mut output);
} else {
output.push(ch);
}
}
Ok(output)
}
}
impl fmt::Display for Template {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.pattern)
}
}
pub(crate) fn percent_encode_path(input: &str, out: &mut String) {
for byte in input.bytes() {
match byte {
b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
out.push(byte as char);
}
b':' | b'@' | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';'
| b'=' => {
out.push(byte as char);
}
_ => {
out.push('%');
out.push(hex_nibble(byte >> 4));
out.push(hex_nibble(byte & 0x0F));
}
}
}
}
#[inline]
pub(crate) fn hex_nibble(n: u8) -> char {
match n {
0..=9 => (b'0' + n) as char,
10..=15 => (b'A' + n - 10) as char,
_ => '0',
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ObjectSpec {
Template(Template),
Column(String),
Constant(String),
TypedColumn {
column: String,
datatype: String,
},
LangColumn {
column: String,
lang_column: String,
},
LangFixed {
column: String,
lang: String,
},
ConstantIri(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PredicateObjectMap {
pub predicate: String,
pub object_template: ObjectSpec,
}
impl PredicateObjectMap {
pub fn new(predicate: impl Into<String>, object_template: ObjectSpec) -> Self {
Self {
predicate: predicate.into(),
object_template,
}
}
}
#[derive(Debug, Clone)]
pub enum DataSource {
Csv {
content: String,
delimiter: char,
},
Json {
content: String,
json_path: Option<String>,
},
InlineValues {
rows: Vec<Vec<String>>,
headers: Vec<String>,
},
}
#[derive(Debug, Clone)]
pub struct MappingRule {
pub name: String,
pub source: DataSource,
pub subject_template: Template,
pub predicate_object_maps: Vec<PredicateObjectMap>,
pub graph_name: Option<String>,
}
impl MappingRule {
pub fn new(name: impl Into<String>, source: DataSource, subject_template: Template) -> Self {
Self {
name: name.into(),
source,
subject_template,
predicate_object_maps: Vec::new(),
graph_name: None,
}
}
pub fn add_predicate_object_map(&mut self, pom: PredicateObjectMap) {
self.predicate_object_maps.push(pom);
}
}
pub fn resolve_object_spec(spec: &ObjectSpec, row: &Row, row_idx: usize) -> MappingResult<Object> {
use oxirs_core::model::Literal;
match spec {
ObjectSpec::Template(tpl) => {
let iri = tpl.render(row, row_idx)?;
let node = NamedNode::new(&iri)
.map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
Ok(Object::NamedNode(node))
}
ObjectSpec::Column(col) => {
let value = row.get(col).ok_or_else(|| MappingError::MissingColumn {
column: col.clone(),
row_index: row_idx,
})?;
Ok(Object::Literal(Literal::new(value)))
}
ObjectSpec::Constant(value) => Ok(Object::Literal(Literal::new(value))),
ObjectSpec::TypedColumn { column, datatype } => {
let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
column: column.clone(),
row_index: row_idx,
})?;
let dt_node = NamedNode::new(datatype).map_err(|_| MappingError::InvalidObjectIri {
iri: datatype.clone(),
})?;
Ok(Object::Literal(Literal::new_typed_literal(value, dt_node)))
}
ObjectSpec::LangColumn {
column,
lang_column,
} => {
let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
column: column.clone(),
row_index: row_idx,
})?;
let lang = row
.get(lang_column)
.ok_or_else(|| MappingError::MissingColumn {
column: lang_column.clone(),
row_index: row_idx,
})?;
let lit = oxirs_core::model::Literal::new_language_tagged_literal(value, lang)
.map_err(|e| MappingError::RdfModelError(e.to_string()))?;
Ok(Object::Literal(lit))
}
ObjectSpec::LangFixed { column, lang } => {
let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
column: column.clone(),
row_index: row_idx,
})?;
let lit = oxirs_core::model::Literal::new_language_tagged_literal(value, lang)
.map_err(|e| MappingError::RdfModelError(e.to_string()))?;
Ok(Object::Literal(lit))
}
ObjectSpec::ConstantIri(iri) => {
let node = NamedNode::new(iri)
.map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
Ok(Object::NamedNode(node))
}
}
}
pub fn build_triple_from_pom(
subject: &Subject,
pom: &PredicateObjectMap,
row: &Row,
row_idx: usize,
) -> MappingResult<Triple> {
let pred_node =
NamedNode::new(&pom.predicate).map_err(|_| MappingError::InvalidPredicateIri {
iri: pom.predicate.clone(),
})?;
let predicate: Predicate = pred_node.into();
let object = resolve_object_spec(&pom.object_template, row, row_idx)?;
Ok(Triple::new(subject.clone(), predicate, object))
}