use super::{reader::CsvRecord, schema::CsvwMetadata, CsvwError};
#[derive(Debug, Clone, PartialEq)]
pub struct RdfStatement {
pub subject: String,
pub predicate: String,
pub object: String,
}
#[derive(Debug, Clone)]
pub struct CsvwConverterConfig {
pub base_iri: String,
pub start_row: usize,
}
impl Default for CsvwConverterConfig {
fn default() -> Self {
Self {
base_iri: "http://example.org/row/".into(),
start_row: 1,
}
}
}
pub struct CsvwConverter {
config: CsvwConverterConfig,
}
impl CsvwConverter {
pub fn new(config: CsvwConverterConfig) -> Self {
Self { config }
}
pub fn convert(
&self,
headers: &[String],
records: &[CsvRecord],
metadata: &CsvwMetadata,
) -> Result<Vec<RdfStatement>, CsvwError> {
let mut statements = Vec::new();
for (row_idx, record) in records.iter().enumerate() {
if record.fields.len() != headers.len() {
return Err(CsvwError::ConversionError(format!(
"row {} has {} fields but header has {}",
self.config.start_row + row_idx,
record.fields.len(),
headers.len(),
)));
}
let subject =
self.subject_for_row(self.config.start_row + row_idx, headers, record, metadata);
for (col_idx, header) in headers.iter().enumerate() {
let col_def = metadata.column(header);
if col_def.map(|c| c.suppress_output).unwrap_or(false) {
continue;
}
let predicate = match col_def {
Some(col) => self.predicate_for_column(col),
None => format!("<{}{}>", self.config.base_iri, url_encode(header)),
};
let datatype = col_def.and_then(|c| c.datatype.as_deref());
let raw_value = &record.fields[col_idx];
let object = self.object_for_value(raw_value, datatype);
statements.push(RdfStatement {
subject: subject.clone(),
predicate,
object,
});
}
}
Ok(statements)
}
pub(crate) fn subject_for_row(
&self,
row_index: usize,
headers: &[String],
record: &CsvRecord,
metadata: &CsvwMetadata,
) -> String {
match &metadata.about_url {
Some(template) => {
let mut expanded = template.clone();
for (idx, header) in headers.iter().enumerate() {
let placeholder = format!("{{{header}}}");
if let Some(value) = record.fields.get(idx) {
expanded = expanded.replace(&placeholder, &url_encode(value));
}
}
if expanded.starts_with("http://") || expanded.starts_with("https://") {
format!("<{expanded}>")
} else {
expanded
}
}
None => {
format!("<{}{}>", self.config.base_iri, row_index)
}
}
}
pub(crate) fn predicate_for_column(&self, col: &super::schema::ColumnDef) -> String {
match &col.property_url {
Some(url) => {
if url.starts_with('<') {
url.clone()
} else {
format!("<{url}>")
}
}
None => format!("<{}{}>", self.config.base_iri, url_encode(&col.name)),
}
}
pub(crate) fn object_for_value(&self, value: &str, datatype: Option<&str>) -> String {
let escaped = escape_literal(value);
match datatype {
Some(dt) => {
let dt_iri = if dt.starts_with('<') {
dt.to_owned()
} else {
format!("<{dt}>")
};
format!("\"{escaped}\"^^{dt_iri}")
}
None => {
format!("\"{escaped}\"^^<http://www.w3.org/2001/XMLSchema#string>")
}
}
}
}
fn url_encode(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for b in s.bytes() {
match b {
b'A'..=b'Z'
| b'a'..=b'z'
| b'0'..=b'9'
| b'-'
| b'_'
| b'.'
| b'~'
| b':'
| b'/'
| b'@'
| b'!'
| b'$'
| b'&'
| b'\''
| b'('
| b')'
| b'*'
| b'+'
| b','
| b';'
| b'=' => out.push(b as char),
other => {
out.push('%');
let hi = (other >> 4) & 0x0F;
let lo = other & 0x0F;
out.push(HEX_CHARS[hi as usize] as char);
out.push(HEX_CHARS[lo as usize] as char);
}
}
}
out
}
const HEX_CHARS: &[u8] = b"0123456789ABCDEF";
fn escape_literal(s: &str) -> String {
let mut out = String::with_capacity(s.len());
for ch in s.chars() {
match ch {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
other => out.push(other),
}
}
out
}