1use std::collections::HashMap;
4use std::fmt;
5
6use oxirs_core::model::{NamedNode, Object, Predicate, Subject, Triple};
7use thiserror::Error;
8
9#[derive(Debug, Error)]
13pub enum MappingError {
14 #[error("Missing column '{column}' in row {row_index}")]
16 MissingColumn {
17 column: String,
19 row_index: usize,
21 },
22
23 #[error("Template '{template}' references unknown column '{column}' in row {row_index}")]
25 UnresolvableTemplate {
26 template: String,
28 column: String,
30 row_index: usize,
32 },
33
34 #[error("Invalid IRI generated from template '{template}': '{iri}'")]
36 InvalidIri {
37 template: String,
39 iri: String,
41 },
42
43 #[error("Invalid predicate IRI: '{iri}'")]
45 InvalidPredicateIri {
46 iri: String,
48 },
49
50 #[error("Invalid object IRI: '{iri}'")]
52 InvalidObjectIri {
53 iri: String,
55 },
56
57 #[error("JSON parse error: {message}")]
59 JsonParseError {
60 message: String,
62 },
63
64 #[error("CSV parse error at line {line}: {message}")]
66 CsvParseError {
67 line: usize,
69 message: String,
71 },
72
73 #[error("JSON path '{path}' did not match any array in the document")]
75 JsonPathNoMatch {
76 path: String,
78 },
79
80 #[error("Data source produced no rows")]
82 EmptyDataSource,
83
84 #[error("RDF model error: {0}")]
86 RdfModelError(String),
87}
88
89pub type MappingResult<T> = Result<T, MappingError>;
91
92#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct Row {
97 pub values: HashMap<String, String>,
99}
100
101impl Row {
102 pub fn new() -> Self {
104 Self {
105 values: HashMap::new(),
106 }
107 }
108
109 pub fn from_pairs(pairs: impl IntoIterator<Item = (String, String)>) -> Self {
111 Self {
112 values: pairs.into_iter().collect(),
113 }
114 }
115
116 pub fn get(&self, column: &str) -> Option<&str> {
118 self.values.get(column).map(String::as_str)
119 }
120
121 pub fn contains(&self, column: &str) -> bool {
123 self.values.contains_key(column)
124 }
125
126 pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
128 self.values.iter().map(|(k, v)| (k.as_str(), v.as_str()))
129 }
130}
131
132impl Default for Row {
133 fn default() -> Self {
134 Self::new()
135 }
136}
137
138impl fmt::Display for Row {
139 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140 let mut entries: Vec<_> = self.values.iter().collect();
141 entries.sort_by_key(|(k, _)| k.as_str());
142 write!(f, "{{")?;
143 for (i, (k, v)) in entries.iter().enumerate() {
144 if i > 0 {
145 write!(f, ", ")?;
146 }
147 write!(f, "{k}: {v}")?;
148 }
149 write!(f, "}}")
150 }
151}
152
153#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct Template {
174 pub pattern: String,
176}
177
178impl Template {
179 pub fn new(pattern: impl Into<String>) -> Self {
181 Self {
182 pattern: pattern.into(),
183 }
184 }
185
186 pub fn render(&self, row: &Row, row_index: usize) -> MappingResult<String> {
191 let mut output = String::with_capacity(self.pattern.len() + 32);
192 let mut chars = self.pattern.chars().peekable();
193
194 while let Some(ch) = chars.next() {
195 if ch == '{' {
196 let mut col_name = String::new();
198 let mut closed = false;
199 for inner in chars.by_ref() {
200 if inner == '}' {
201 closed = true;
202 break;
203 }
204 col_name.push(inner);
205 }
206 if !closed {
207 output.push('{');
209 output.push_str(&col_name);
210 continue;
211 }
212 let value =
213 row.get(&col_name)
214 .ok_or_else(|| MappingError::UnresolvableTemplate {
215 template: self.pattern.clone(),
216 column: col_name.clone(),
217 row_index,
218 })?;
219 percent_encode_path(value, &mut output);
220 } else {
221 output.push(ch);
222 }
223 }
224 Ok(output)
225 }
226}
227
228impl fmt::Display for Template {
229 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230 f.write_str(&self.pattern)
231 }
232}
233
234pub(crate) fn percent_encode_path(input: &str, out: &mut String) {
237 for byte in input.bytes() {
238 match byte {
239 b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
241 out.push(byte as char);
242 }
243 b':' | b'@' | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';'
245 | b'=' => {
246 out.push(byte as char);
247 }
248 _ => {
249 out.push('%');
250 out.push(hex_nibble(byte >> 4));
251 out.push(hex_nibble(byte & 0x0F));
252 }
253 }
254 }
255}
256
257#[inline]
258pub(crate) fn hex_nibble(n: u8) -> char {
259 match n {
260 0..=9 => (b'0' + n) as char,
261 10..=15 => (b'A' + n - 10) as char,
262 _ => '0',
263 }
264}
265
266#[derive(Debug, Clone, PartialEq, Eq)]
270pub enum ObjectSpec {
271 Template(Template),
273
274 Column(String),
276
277 Constant(String),
279
280 TypedColumn {
282 column: String,
284 datatype: String,
286 },
287
288 LangColumn {
291 column: String,
293 lang_column: String,
295 },
296
297 LangFixed {
299 column: String,
301 lang: String,
303 },
304
305 ConstantIri(String),
307}
308
309#[derive(Debug, Clone, PartialEq, Eq)]
314pub struct PredicateObjectMap {
315 pub predicate: String,
317 pub object_template: ObjectSpec,
319}
320
321impl PredicateObjectMap {
322 pub fn new(predicate: impl Into<String>, object_template: ObjectSpec) -> Self {
324 Self {
325 predicate: predicate.into(),
326 object_template,
327 }
328 }
329}
330
331#[derive(Debug, Clone)]
335pub enum DataSource {
336 Csv {
338 content: String,
340 delimiter: char,
342 },
343
344 Json {
346 content: String,
348 json_path: Option<String>,
350 },
351
352 InlineValues {
354 rows: Vec<Vec<String>>,
356 headers: Vec<String>,
358 },
359}
360
361#[derive(Debug, Clone)]
365pub struct MappingRule {
366 pub name: String,
368 pub source: DataSource,
370 pub subject_template: Template,
372 pub predicate_object_maps: Vec<PredicateObjectMap>,
374 pub graph_name: Option<String>,
376}
377
378impl MappingRule {
379 pub fn new(name: impl Into<String>, source: DataSource, subject_template: Template) -> Self {
381 Self {
382 name: name.into(),
383 source,
384 subject_template,
385 predicate_object_maps: Vec::new(),
386 graph_name: None,
387 }
388 }
389
390 pub fn add_predicate_object_map(&mut self, pom: PredicateObjectMap) {
392 self.predicate_object_maps.push(pom);
393 }
394}
395
396pub fn resolve_object_spec(spec: &ObjectSpec, row: &Row, row_idx: usize) -> MappingResult<Object> {
403 use oxirs_core::model::Literal;
404 match spec {
405 ObjectSpec::Template(tpl) => {
406 let iri = tpl.render(row, row_idx)?;
407 let node = NamedNode::new(&iri)
408 .map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
409 Ok(Object::NamedNode(node))
410 }
411
412 ObjectSpec::Column(col) => {
413 let value = row.get(col).ok_or_else(|| MappingError::MissingColumn {
414 column: col.clone(),
415 row_index: row_idx,
416 })?;
417 Ok(Object::Literal(Literal::new(value)))
418 }
419
420 ObjectSpec::Constant(value) => Ok(Object::Literal(Literal::new(value))),
421
422 ObjectSpec::TypedColumn { column, datatype } => {
423 let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
424 column: column.clone(),
425 row_index: row_idx,
426 })?;
427 let dt_node = NamedNode::new(datatype).map_err(|_| MappingError::InvalidObjectIri {
428 iri: datatype.clone(),
429 })?;
430 Ok(Object::Literal(Literal::new_typed_literal(value, dt_node)))
431 }
432
433 ObjectSpec::LangColumn {
434 column,
435 lang_column,
436 } => {
437 let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
438 column: column.clone(),
439 row_index: row_idx,
440 })?;
441 let lang = row
442 .get(lang_column)
443 .ok_or_else(|| MappingError::MissingColumn {
444 column: lang_column.clone(),
445 row_index: row_idx,
446 })?;
447 let lit = oxirs_core::model::Literal::new_language_tagged_literal(value, lang)
448 .map_err(|e| MappingError::RdfModelError(e.to_string()))?;
449 Ok(Object::Literal(lit))
450 }
451
452 ObjectSpec::LangFixed { column, lang } => {
453 let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
454 column: column.clone(),
455 row_index: row_idx,
456 })?;
457 let lit = oxirs_core::model::Literal::new_language_tagged_literal(value, lang)
458 .map_err(|e| MappingError::RdfModelError(e.to_string()))?;
459 Ok(Object::Literal(lit))
460 }
461
462 ObjectSpec::ConstantIri(iri) => {
463 let node = NamedNode::new(iri)
464 .map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
465 Ok(Object::NamedNode(node))
466 }
467 }
468}
469
470pub fn build_triple_from_pom(
472 subject: &Subject,
473 pom: &PredicateObjectMap,
474 row: &Row,
475 row_idx: usize,
476) -> MappingResult<Triple> {
477 let pred_node =
478 NamedNode::new(&pom.predicate).map_err(|_| MappingError::InvalidPredicateIri {
479 iri: pom.predicate.clone(),
480 })?;
481 let predicate: Predicate = pred_node.into();
482 let object = resolve_object_spec(&pom.object_template, row, row_idx)?;
483 Ok(Triple::new(subject.clone(), predicate, object))
484}