Skip to main content

oxirs_ttl/mapping/
mapping_types.rs

1//! Core data types for RML mapping: errors, rows, templates, specs, rules.
2
3use std::collections::HashMap;
4use std::fmt;
5
6use oxirs_core::model::{NamedNode, Object, Predicate, Subject, Triple};
7use thiserror::Error;
8
9// ─── Error Types ─────────────────────────────────────────────────────────────
10
11/// Errors that can occur during RML mapping operations
12#[derive(Debug, Error)]
13pub enum MappingError {
14    /// A required column was not found in the data row
15    #[error("Missing column '{column}' in row {row_index}")]
16    MissingColumn {
17        /// Column name that was not found
18        column: String,
19        /// Zero-based index of the row where the error occurred
20        row_index: usize,
21    },
22
23    /// A template contained an unresolvable reference
24    #[error("Template '{template}' references unknown column '{column}' in row {row_index}")]
25    UnresolvableTemplate {
26        /// The template pattern string
27        template: String,
28        /// Column name referenced in the template
29        column: String,
30        /// Zero-based index of the row
31        row_index: usize,
32    },
33
34    /// An IRI generated from a template was syntactically invalid
35    #[error("Invalid IRI generated from template '{template}': '{iri}'")]
36    InvalidIri {
37        /// The template pattern
38        template: String,
39        /// The invalid IRI that was generated
40        iri: String,
41    },
42
43    /// A predicate IRI was syntactically invalid
44    #[error("Invalid predicate IRI: '{iri}'")]
45    InvalidPredicateIri {
46        /// The invalid IRI
47        iri: String,
48    },
49
50    /// An object IRI was syntactically invalid
51    #[error("Invalid object IRI: '{iri}'")]
52    InvalidObjectIri {
53        /// The invalid IRI
54        iri: String,
55    },
56
57    /// JSON parsing failed
58    #[error("JSON parse error: {message}")]
59    JsonParseError {
60        /// Human-readable description of the parse failure
61        message: String,
62    },
63
64    /// CSV parsing failed
65    #[error("CSV parse error at line {line}: {message}")]
66    CsvParseError {
67        /// Line number where the error occurred (1-based)
68        line: usize,
69        /// Human-readable description of the parse failure
70        message: String,
71    },
72
73    /// A JSON path expression was invalid or matched no data
74    #[error("JSON path '{path}' did not match any array in the document")]
75    JsonPathNoMatch {
76        /// The JSON path that failed to match
77        path: String,
78    },
79
80    /// No rows could be extracted from the data source
81    #[error("Data source produced no rows")]
82    EmptyDataSource,
83
84    /// Core RDF model error
85    #[error("RDF model error: {0}")]
86    RdfModelError(String),
87}
88
89/// Convenience type alias for mapping results
90pub type MappingResult<T> = Result<T, MappingError>;
91
92// ─── Core Data Types ──────────────────────────────────────────────────────────
93
94/// A data row represented as a map from column name to string value
95#[derive(Debug, Clone, PartialEq, Eq)]
96pub struct Row {
97    /// Map from column (field) name to value
98    pub values: HashMap<String, String>,
99}
100
101impl Row {
102    /// Create a new empty row
103    pub fn new() -> Self {
104        Self {
105            values: HashMap::new(),
106        }
107    }
108
109    /// Create a row from an iterator of (key, value) pairs
110    pub fn from_pairs(pairs: impl IntoIterator<Item = (String, String)>) -> Self {
111        Self {
112            values: pairs.into_iter().collect(),
113        }
114    }
115
116    /// Get a value by column name
117    pub fn get(&self, column: &str) -> Option<&str> {
118        self.values.get(column).map(String::as_str)
119    }
120
121    /// Check whether a column exists (even if empty)
122    pub fn contains(&self, column: &str) -> bool {
123        self.values.contains_key(column)
124    }
125
126    /// Return an iterator over all (column, value) pairs
127    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
128        self.values.iter().map(|(k, v)| (k.as_str(), v.as_str()))
129    }
130}
131
132impl Default for Row {
133    fn default() -> Self {
134        Self::new()
135    }
136}
137
138impl fmt::Display for Row {
139    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
140        let mut entries: Vec<_> = self.values.iter().collect();
141        entries.sort_by_key(|(k, _)| k.as_str());
142        write!(f, "{{")?;
143        for (i, (k, v)) in entries.iter().enumerate() {
144            if i > 0 {
145                write!(f, ", ")?;
146            }
147            write!(f, "{k}: {v}")?;
148        }
149        write!(f, "}}")
150    }
151}
152
153// ─── Template ────────────────────────────────────────────────────────────────
154
155/// A URI template that can be rendered using row values.
156///
157/// Template syntax: literal text with `{column_name}` placeholders.
158/// Column references are URL-percent-encoded to produce valid IRIs.
159///
160/// # Example
161///
162/// ```
163/// use oxirs_ttl::mapping::{Template, Row};
164/// use std::collections::HashMap;
165///
166/// let tpl = Template::new("http://example.org/{id}/profile");
167/// let mut row = Row::new();
168/// row.values.insert("id".to_string(), "42".to_string());
169/// let iri = tpl.render(&row, 0).expect("should succeed");
170/// assert_eq!(iri, "http://example.org/42/profile");
171/// ```
172#[derive(Debug, Clone, PartialEq, Eq)]
173pub struct Template {
174    /// The raw pattern string, e.g. `"http://example.org/{column_name}"`
175    pub pattern: String,
176}
177
178impl Template {
179    /// Create a new template from any string-like value
180    pub fn new(pattern: impl Into<String>) -> Self {
181        Self {
182            pattern: pattern.into(),
183        }
184    }
185
186    /// Render the template by substituting `{column}` placeholders with
187    /// percent-encoded values from `row`.
188    ///
189    /// Returns an error if a referenced column is absent from the row.
190    pub fn render(&self, row: &Row, row_index: usize) -> MappingResult<String> {
191        let mut output = String::with_capacity(self.pattern.len() + 32);
192        let mut chars = self.pattern.chars().peekable();
193
194        while let Some(ch) = chars.next() {
195            if ch == '{' {
196                // Collect column name until '}'
197                let mut col_name = String::new();
198                let mut closed = false;
199                for inner in chars.by_ref() {
200                    if inner == '}' {
201                        closed = true;
202                        break;
203                    }
204                    col_name.push(inner);
205                }
206                if !closed {
207                    // Treat un-closed brace as literal text
208                    output.push('{');
209                    output.push_str(&col_name);
210                    continue;
211                }
212                let value =
213                    row.get(&col_name)
214                        .ok_or_else(|| MappingError::UnresolvableTemplate {
215                            template: self.pattern.clone(),
216                            column: col_name.clone(),
217                            row_index,
218                        })?;
219                percent_encode_path(value, &mut output);
220            } else {
221                output.push(ch);
222            }
223        }
224        Ok(output)
225    }
226}
227
228impl fmt::Display for Template {
229    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
230        f.write_str(&self.pattern)
231    }
232}
233
234/// Percent-encode characters that are illegal in IRI paths.
235/// RFC 3986 unreserved chars and common path chars are kept as-is.
236pub(crate) fn percent_encode_path(input: &str, out: &mut String) {
237    for byte in input.bytes() {
238        match byte {
239            // RFC 3986 unreserved characters
240            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
241                out.push(byte as char);
242            }
243            // Additional IRI-safe characters
244            b':' | b'@' | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';'
245            | b'=' => {
246                out.push(byte as char);
247            }
248            _ => {
249                out.push('%');
250                out.push(hex_nibble(byte >> 4));
251                out.push(hex_nibble(byte & 0x0F));
252            }
253        }
254    }
255}
256
257#[inline]
258pub(crate) fn hex_nibble(n: u8) -> char {
259    match n {
260        0..=9 => (b'0' + n) as char,
261        10..=15 => (b'A' + n - 10) as char,
262        _ => '0',
263    }
264}
265
266// ─── ObjectSpec ──────────────────────────────────────────────────────────────
267
268/// Specifies how the object of a triple should be produced from a data row
269#[derive(Debug, Clone, PartialEq, Eq)]
270pub enum ObjectSpec {
271    /// Generate an IRI by rendering a template against the row
272    Template(Template),
273
274    /// Use the column value as a plain string literal (`xsd:string`)
275    Column(String),
276
277    /// Use a constant string — always the same value regardless of the row
278    Constant(String),
279
280    /// Use the column value as a typed literal
281    TypedColumn {
282        /// Column name containing the lexical value
283        column: String,
284        /// Full XSD datatype IRI (e.g. `"http://www.w3.org/2001/XMLSchema#integer"`)
285        datatype: String,
286    },
287
288    /// Use the column value as a language-tagged literal; the language tag is
289    /// taken from another column.
290    LangColumn {
291        /// Column name containing the literal text
292        column: String,
293        /// Column name containing the BCP 47 language tag (e.g. `"en"`)
294        lang_column: String,
295    },
296
297    /// Use the column value as a language-tagged literal with a fixed language
298    LangFixed {
299        /// Column name containing the literal text
300        column: String,
301        /// BCP 47 language tag (e.g. `"en"`)
302        lang: String,
303    },
304
305    /// Use a constant IRI value (no template substitution)
306    ConstantIri(String),
307}
308
309// ─── PredicateObjectMap ───────────────────────────────────────────────────────
310
311/// Pairs a predicate IRI with an [`ObjectSpec`] to produce a single
312/// predicate-object component of a triple.
313#[derive(Debug, Clone, PartialEq, Eq)]
314pub struct PredicateObjectMap {
315    /// Full IRI for the predicate
316    pub predicate: String,
317    /// Specification for how to produce the object
318    pub object_template: ObjectSpec,
319}
320
321impl PredicateObjectMap {
322    /// Create a new predicate-object map
323    pub fn new(predicate: impl Into<String>, object_template: ObjectSpec) -> Self {
324        Self {
325            predicate: predicate.into(),
326            object_template,
327        }
328    }
329}
330
331// ─── DataSource ───────────────────────────────────────────────────────────────
332
333/// The origin of the data to be mapped to RDF
334#[derive(Debug, Clone)]
335pub enum DataSource {
336    /// CSV text with configurable delimiter
337    Csv {
338        /// The raw CSV content
339        content: String,
340        /// Field delimiter character (usually `','`)
341        delimiter: char,
342    },
343
344    /// JSON text, optionally with a dot-separated path to the target array
345    Json {
346        /// The raw JSON content
347        content: String,
348        /// Optional dot-separated path to the array of objects (e.g. `"results.bindings"`)
349        json_path: Option<String>,
350    },
351
352    /// Pre-parsed rows supplied directly as vectors
353    InlineValues {
354        /// Rows of string values
355        rows: Vec<Vec<String>>,
356        /// Column headers that correspond positionally to the values in each row
357        headers: Vec<String>,
358    },
359}
360
361// ─── MappingRule ──────────────────────────────────────────────────────────────
362
363/// A complete mapping rule that produces RDF triples from a [`DataSource`]
364#[derive(Debug, Clone)]
365pub struct MappingRule {
366    /// Human-readable name for this rule (used in error messages)
367    pub name: String,
368    /// The data source to read rows from
369    pub source: DataSource,
370    /// Template for generating the subject IRI of each triple
371    pub subject_template: Template,
372    /// List of predicate-object pairs to generate for each row
373    pub predicate_object_maps: Vec<PredicateObjectMap>,
374    /// Optional named graph to assign all generated triples to
375    pub graph_name: Option<String>,
376}
377
378impl MappingRule {
379    /// Create a minimal mapping rule (use [`MappingRuleBuilder`] for ergonomic construction)
380    pub fn new(name: impl Into<String>, source: DataSource, subject_template: Template) -> Self {
381        Self {
382            name: name.into(),
383            source,
384            subject_template,
385            predicate_object_maps: Vec::new(),
386            graph_name: None,
387        }
388    }
389
390    /// Add a predicate-object map to this rule
391    pub fn add_predicate_object_map(&mut self, pom: PredicateObjectMap) {
392        self.predicate_object_maps.push(pom);
393    }
394}
395
396// ─── Object resolution helper (shared with engine) ────────────────────────────
397
398/// Resolve an [`ObjectSpec`] against a data row to produce an RDF [`Object`].
399///
400/// This is a free function so that both `MappingEngine` (in `mapping_transformers`)
401/// and any future code can reuse the logic without duplicating it.
402pub fn resolve_object_spec(spec: &ObjectSpec, row: &Row, row_idx: usize) -> MappingResult<Object> {
403    use oxirs_core::model::Literal;
404    match spec {
405        ObjectSpec::Template(tpl) => {
406            let iri = tpl.render(row, row_idx)?;
407            let node = NamedNode::new(&iri)
408                .map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
409            Ok(Object::NamedNode(node))
410        }
411
412        ObjectSpec::Column(col) => {
413            let value = row.get(col).ok_or_else(|| MappingError::MissingColumn {
414                column: col.clone(),
415                row_index: row_idx,
416            })?;
417            Ok(Object::Literal(Literal::new(value)))
418        }
419
420        ObjectSpec::Constant(value) => Ok(Object::Literal(Literal::new(value))),
421
422        ObjectSpec::TypedColumn { column, datatype } => {
423            let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
424                column: column.clone(),
425                row_index: row_idx,
426            })?;
427            let dt_node = NamedNode::new(datatype).map_err(|_| MappingError::InvalidObjectIri {
428                iri: datatype.clone(),
429            })?;
430            Ok(Object::Literal(Literal::new_typed_literal(value, dt_node)))
431        }
432
433        ObjectSpec::LangColumn {
434            column,
435            lang_column,
436        } => {
437            let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
438                column: column.clone(),
439                row_index: row_idx,
440            })?;
441            let lang = row
442                .get(lang_column)
443                .ok_or_else(|| MappingError::MissingColumn {
444                    column: lang_column.clone(),
445                    row_index: row_idx,
446                })?;
447            let lit = oxirs_core::model::Literal::new_language_tagged_literal(value, lang)
448                .map_err(|e| MappingError::RdfModelError(e.to_string()))?;
449            Ok(Object::Literal(lit))
450        }
451
452        ObjectSpec::LangFixed { column, lang } => {
453            let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
454                column: column.clone(),
455                row_index: row_idx,
456            })?;
457            let lit = oxirs_core::model::Literal::new_language_tagged_literal(value, lang)
458                .map_err(|e| MappingError::RdfModelError(e.to_string()))?;
459            Ok(Object::Literal(lit))
460        }
461
462        ObjectSpec::ConstantIri(iri) => {
463            let node = NamedNode::new(iri)
464                .map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
465            Ok(Object::NamedNode(node))
466        }
467    }
468}
469
470/// Build a single [`Triple`] from its components and a predicate-object map.
471pub fn build_triple_from_pom(
472    subject: &Subject,
473    pom: &PredicateObjectMap,
474    row: &Row,
475    row_idx: usize,
476) -> MappingResult<Triple> {
477    let pred_node =
478        NamedNode::new(&pom.predicate).map_err(|_| MappingError::InvalidPredicateIri {
479            iri: pom.predicate.clone(),
480        })?;
481    let predicate: Predicate = pred_node.into();
482    let object = resolve_object_spec(&pom.object_template, row, row_idx)?;
483    Ok(Triple::new(subject.clone(), predicate, object))
484}