Skip to main content

oxirs_ttl/mapping/
mod.rs

1//! RML-inspired RDF Mapping Language support
2//!
3//! Maps non-RDF data sources (CSV, JSON, inline values) to RDF triples.
4//! Inspired by the W3C RDF Mapping Language (RML) specification:
5//! <https://rml.io/specs/rml/>
6//!
7//! # Example
8//!
9//! ```rust
10//! use oxirs_ttl::mapping::{MappingEngine, MappingRuleBuilder, ObjectSpec};
11//!
12//! let csv_data = "id,name,age\n1,Alice,30\n2,Bob,25";
13//!
14//! let rule = MappingRuleBuilder::new("persons")
15//!     .csv_source(csv_data)
16//!     .subject_template("http://example.org/person/{id}")
17//!     .map(
18//!         "http://xmlns.com/foaf/0.1/name",
19//!         ObjectSpec::Column("name".to_string()),
20//!     )
21//!     .map(
22//!         "http://xmlns.com/foaf/0.1/age",
23//!         ObjectSpec::TypedColumn {
24//!             column: "age".to_string(),
25//!             datatype: "http://www.w3.org/2001/XMLSchema#integer".to_string(),
26//!         },
27//!     )
28//!     .build();
29//!
30//! let engine = MappingEngine::new();
31//! let triples = engine.execute(&rule).expect("should succeed");
32//! assert_eq!(triples.len(), 4); // 2 rows × 2 predicates
33//! ```
34
35use std::collections::HashMap;
36use std::fmt;
37
38use oxirs_core::model::{Literal, NamedNode, Object, Predicate, Subject, Triple};
39use thiserror::Error;
40
41// ─── Error Types ─────────────────────────────────────────────────────────────
42
43/// Errors that can occur during RML mapping operations
44#[derive(Debug, Error)]
45pub enum MappingError {
46    /// A required column was not found in the data row
47    #[error("Missing column '{column}' in row {row_index}")]
48    MissingColumn {
49        /// Column name that was not found
50        column: String,
51        /// Zero-based index of the row where the error occurred
52        row_index: usize,
53    },
54
55    /// A template contained an unresolvable reference
56    #[error("Template '{template}' references unknown column '{column}' in row {row_index}")]
57    UnresolvableTemplate {
58        /// The template pattern string
59        template: String,
60        /// Column name referenced in the template
61        column: String,
62        /// Zero-based index of the row
63        row_index: usize,
64    },
65
66    /// An IRI generated from a template was syntactically invalid
67    #[error("Invalid IRI generated from template '{template}': '{iri}'")]
68    InvalidIri {
69        /// The template pattern
70        template: String,
71        /// The invalid IRI that was generated
72        iri: String,
73    },
74
75    /// A predicate IRI was syntactically invalid
76    #[error("Invalid predicate IRI: '{iri}'")]
77    InvalidPredicateIri {
78        /// The invalid IRI
79        iri: String,
80    },
81
82    /// An object IRI was syntactically invalid
83    #[error("Invalid object IRI: '{iri}'")]
84    InvalidObjectIri {
85        /// The invalid IRI
86        iri: String,
87    },
88
89    /// JSON parsing failed
90    #[error("JSON parse error: {message}")]
91    JsonParseError {
92        /// Human-readable description of the parse failure
93        message: String,
94    },
95
96    /// CSV parsing failed
97    #[error("CSV parse error at line {line}: {message}")]
98    CsvParseError {
99        /// Line number where the error occurred (1-based)
100        line: usize,
101        /// Human-readable description of the parse failure
102        message: String,
103    },
104
105    /// A JSON path expression was invalid or matched no data
106    #[error("JSON path '{path}' did not match any array in the document")]
107    JsonPathNoMatch {
108        /// The JSON path that failed to match
109        path: String,
110    },
111
112    /// No rows could be extracted from the data source
113    #[error("Data source produced no rows")]
114    EmptyDataSource,
115
116    /// Core RDF model error
117    #[error("RDF model error: {0}")]
118    RdfModelError(String),
119}
120
121/// Convenience type alias for mapping results
122pub type MappingResult<T> = Result<T, MappingError>;
123
124// ─── Core Data Types ──────────────────────────────────────────────────────────
125
126/// A data row represented as a map from column name to string value
127#[derive(Debug, Clone, PartialEq, Eq)]
128pub struct Row {
129    /// Map from column (field) name to value
130    pub values: HashMap<String, String>,
131}
132
133impl Row {
134    /// Create a new empty row
135    pub fn new() -> Self {
136        Self {
137            values: HashMap::new(),
138        }
139    }
140
141    /// Create a row from an iterator of (key, value) pairs
142    pub fn from_pairs(pairs: impl IntoIterator<Item = (String, String)>) -> Self {
143        Self {
144            values: pairs.into_iter().collect(),
145        }
146    }
147
148    /// Get a value by column name
149    pub fn get(&self, column: &str) -> Option<&str> {
150        self.values.get(column).map(String::as_str)
151    }
152
153    /// Check whether a column exists (even if empty)
154    pub fn contains(&self, column: &str) -> bool {
155        self.values.contains_key(column)
156    }
157
158    /// Return an iterator over all (column, value) pairs
159    pub fn iter(&self) -> impl Iterator<Item = (&str, &str)> {
160        self.values.iter().map(|(k, v)| (k.as_str(), v.as_str()))
161    }
162}
163
164impl Default for Row {
165    fn default() -> Self {
166        Self::new()
167    }
168}
169
170impl fmt::Display for Row {
171    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
172        let mut entries: Vec<_> = self.values.iter().collect();
173        entries.sort_by_key(|(k, _)| k.as_str());
174        write!(f, "{{")?;
175        for (i, (k, v)) in entries.iter().enumerate() {
176            if i > 0 {
177                write!(f, ", ")?;
178            }
179            write!(f, "{k}: {v}")?;
180        }
181        write!(f, "}}")
182    }
183}
184
185// ─── Template ────────────────────────────────────────────────────────────────
186
187/// A URI template that can be rendered using row values.
188///
189/// Template syntax: literal text with `{column_name}` placeholders.
190/// Column references are URL-percent-encoded to produce valid IRIs.
191///
192/// # Example
193///
194/// ```
195/// use oxirs_ttl::mapping::{Template, Row};
196/// use std::collections::HashMap;
197///
198/// let tpl = Template::new("http://example.org/{id}/profile");
199/// let mut row = Row::new();
200/// row.values.insert("id".to_string(), "42".to_string());
201/// let iri = tpl.render(&row, 0).expect("should succeed");
202/// assert_eq!(iri, "http://example.org/42/profile");
203/// ```
204#[derive(Debug, Clone, PartialEq, Eq)]
205pub struct Template {
206    /// The raw pattern string, e.g. `"http://example.org/{column_name}"`
207    pub pattern: String,
208}
209
210impl Template {
211    /// Create a new template from any string-like value
212    pub fn new(pattern: impl Into<String>) -> Self {
213        Self {
214            pattern: pattern.into(),
215        }
216    }
217
218    /// Render the template by substituting `{column}` placeholders with
219    /// percent-encoded values from `row`.
220    ///
221    /// Returns an error if a referenced column is absent from the row.
222    pub fn render(&self, row: &Row, row_index: usize) -> MappingResult<String> {
223        let mut output = String::with_capacity(self.pattern.len() + 32);
224        let mut chars = self.pattern.chars().peekable();
225
226        while let Some(ch) = chars.next() {
227            if ch == '{' {
228                // Collect column name until '}'
229                let mut col_name = String::new();
230                let mut closed = false;
231                for inner in chars.by_ref() {
232                    if inner == '}' {
233                        closed = true;
234                        break;
235                    }
236                    col_name.push(inner);
237                }
238                if !closed {
239                    // Treat un-closed brace as literal text
240                    output.push('{');
241                    output.push_str(&col_name);
242                    continue;
243                }
244                let value =
245                    row.get(&col_name)
246                        .ok_or_else(|| MappingError::UnresolvableTemplate {
247                            template: self.pattern.clone(),
248                            column: col_name.clone(),
249                            row_index,
250                        })?;
251                percent_encode_path(value, &mut output);
252            } else {
253                output.push(ch);
254            }
255        }
256        Ok(output)
257    }
258}
259
260impl fmt::Display for Template {
261    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
262        f.write_str(&self.pattern)
263    }
264}
265
266/// Percent-encode characters that are illegal in IRI paths.
267/// RFC 3986 unreserved chars and common path chars are kept as-is.
268fn percent_encode_path(input: &str, out: &mut String) {
269    for byte in input.bytes() {
270        match byte {
271            // RFC 3986 unreserved characters
272            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
273                out.push(byte as char);
274            }
275            // Additional IRI-safe characters
276            b':' | b'@' | b'!' | b'$' | b'&' | b'\'' | b'(' | b')' | b'*' | b'+' | b',' | b';'
277            | b'=' => {
278                out.push(byte as char);
279            }
280            _ => {
281                out.push('%');
282                out.push(hex_nibble(byte >> 4));
283                out.push(hex_nibble(byte & 0x0F));
284            }
285        }
286    }
287}
288
289#[inline]
290fn hex_nibble(n: u8) -> char {
291    match n {
292        0..=9 => (b'0' + n) as char,
293        10..=15 => (b'A' + n - 10) as char,
294        _ => '0',
295    }
296}
297
298// ─── ObjectSpec ──────────────────────────────────────────────────────────────
299
300/// Specifies how the object of a triple should be produced from a data row
301#[derive(Debug, Clone, PartialEq, Eq)]
302pub enum ObjectSpec {
303    /// Generate an IRI by rendering a template against the row
304    Template(Template),
305
306    /// Use the column value as a plain string literal (`xsd:string`)
307    Column(String),
308
309    /// Use a constant string — always the same value regardless of the row
310    Constant(String),
311
312    /// Use the column value as a typed literal
313    TypedColumn {
314        /// Column name containing the lexical value
315        column: String,
316        /// Full XSD datatype IRI (e.g. `"http://www.w3.org/2001/XMLSchema#integer"`)
317        datatype: String,
318    },
319
320    /// Use the column value as a language-tagged literal; the language tag is
321    /// taken from another column.
322    LangColumn {
323        /// Column name containing the literal text
324        column: String,
325        /// Column name containing the BCP 47 language tag (e.g. `"en"`)
326        lang_column: String,
327    },
328
329    /// Use the column value as a language-tagged literal with a fixed language
330    LangFixed {
331        /// Column name containing the literal text
332        column: String,
333        /// BCP 47 language tag (e.g. `"en"`)
334        lang: String,
335    },
336
337    /// Use a constant IRI value (no template substitution)
338    ConstantIri(String),
339}
340
341// ─── PredicateObjectMap ───────────────────────────────────────────────────────
342
343/// Pairs a predicate IRI with an [`ObjectSpec`] to produce a single
344/// predicate-object component of a triple.
345#[derive(Debug, Clone, PartialEq, Eq)]
346pub struct PredicateObjectMap {
347    /// Full IRI for the predicate
348    pub predicate: String,
349    /// Specification for how to produce the object
350    pub object_template: ObjectSpec,
351}
352
353impl PredicateObjectMap {
354    /// Create a new predicate-object map
355    pub fn new(predicate: impl Into<String>, object_template: ObjectSpec) -> Self {
356        Self {
357            predicate: predicate.into(),
358            object_template,
359        }
360    }
361}
362
363// ─── DataSource ───────────────────────────────────────────────────────────────
364
365/// The origin of the data to be mapped to RDF
366#[derive(Debug, Clone)]
367pub enum DataSource {
368    /// CSV text with configurable delimiter
369    Csv {
370        /// The raw CSV content
371        content: String,
372        /// Field delimiter character (usually `','`)
373        delimiter: char,
374    },
375
376    /// JSON text, optionally with a dot-separated path to the target array
377    Json {
378        /// The raw JSON content
379        content: String,
380        /// Optional dot-separated path to the array of objects (e.g. `"results.bindings"`)
381        json_path: Option<String>,
382    },
383
384    /// Pre-parsed rows supplied directly as vectors
385    InlineValues {
386        /// Rows of string values
387        rows: Vec<Vec<String>>,
388        /// Column headers that correspond positionally to the values in each row
389        headers: Vec<String>,
390    },
391}
392
393// ─── MappingRule ──────────────────────────────────────────────────────────────
394
395/// A complete mapping rule that produces RDF triples from a [`DataSource`]
396#[derive(Debug, Clone)]
397pub struct MappingRule {
398    /// Human-readable name for this rule (used in error messages)
399    pub name: String,
400    /// The data source to read rows from
401    pub source: DataSource,
402    /// Template for generating the subject IRI of each triple
403    pub subject_template: Template,
404    /// List of predicate-object pairs to generate for each row
405    pub predicate_object_maps: Vec<PredicateObjectMap>,
406    /// Optional named graph to assign all generated triples to
407    pub graph_name: Option<String>,
408}
409
410impl MappingRule {
411    /// Create a minimal mapping rule (use [`MappingRuleBuilder`] for ergonomic construction)
412    pub fn new(name: impl Into<String>, source: DataSource, subject_template: Template) -> Self {
413        Self {
414            name: name.into(),
415            source,
416            subject_template,
417            predicate_object_maps: Vec::new(),
418            graph_name: None,
419        }
420    }
421
422    /// Add a predicate-object map to this rule
423    pub fn add_predicate_object_map(&mut self, pom: PredicateObjectMap) {
424        self.predicate_object_maps.push(pom);
425    }
426}
427
428// ─── MappingEngine ────────────────────────────────────────────────────────────
429
430/// Engine that executes [`MappingRule`]s and produces RDF [`Triple`]s
431///
432/// The engine is stateless and cheap to create.  All configuration is
433/// carried by the rules themselves.
434#[derive(Debug, Default, Clone)]
435pub struct MappingEngine {
436    /// Whether to skip rows that produce errors instead of failing fast
437    pub skip_errors: bool,
438}
439
440impl MappingEngine {
441    /// Create a new mapping engine with default settings (fail-fast)
442    pub fn new() -> Self {
443        Self::default()
444    }
445
446    /// Create an engine that silently skips rows that produce mapping errors
447    pub fn new_lenient() -> Self {
448        Self { skip_errors: true }
449    }
450
451    /// Execute a single mapping rule and return all produced triples
452    pub fn execute(&self, rule: &MappingRule) -> MappingResult<Vec<Triple>> {
453        let (headers, rows) = self.extract_rows(&rule.source)?;
454        let _ = headers; // headers are embedded inside each Row already
455        self.map_rows(rule, &rows)
456    }
457
458    /// Execute multiple rules and concatenate all produced triples
459    pub fn execute_all(&self, rules: &[MappingRule]) -> MappingResult<Vec<Triple>> {
460        let mut all_triples = Vec::new();
461        for rule in rules {
462            let mut triples = self.execute(rule)?;
463            all_triples.append(&mut triples);
464        }
465        Ok(all_triples)
466    }
467
468    // ─── Internal helpers ────────────────────────────────────────────────
469
470    fn extract_rows(&self, source: &DataSource) -> MappingResult<(Vec<String>, Vec<Row>)> {
471        match source {
472            DataSource::Csv { content, delimiter } => Self::parse_csv(content, *delimiter),
473            DataSource::Json { content, json_path } => {
474                let rows = Self::parse_json(content, json_path.as_deref())?;
475                // headers are implicit in the Row keys; return empty list
476                Ok((Vec::new(), rows))
477            }
478            DataSource::InlineValues { rows, headers } => {
479                let parsed_rows: Vec<Row> = rows
480                    .iter()
481                    .map(|row_values| {
482                        let pairs = headers
483                            .iter()
484                            .zip(row_values.iter())
485                            .map(|(h, v)| (h.clone(), v.clone()));
486                        Row::from_pairs(pairs)
487                    })
488                    .collect();
489                Ok((headers.clone(), parsed_rows))
490            }
491        }
492    }
493
494    fn map_rows(&self, rule: &MappingRule, rows: &[Row]) -> MappingResult<Vec<Triple>> {
495        let mut triples = Vec::with_capacity(rows.len() * rule.predicate_object_maps.len());
496
497        for (row_idx, row) in rows.iter().enumerate() {
498            // Generate subject IRI
499            let subject_iri = match rule.subject_template.render(row, row_idx) {
500                Ok(iri) => iri,
501                Err(e) => {
502                    if self.skip_errors {
503                        continue;
504                    }
505                    return Err(e);
506                }
507            };
508
509            let subject_node =
510                NamedNode::new(&subject_iri).map_err(|e| MappingError::InvalidIri {
511                    template: rule.subject_template.pattern.clone(),
512                    iri: format!("{subject_iri} ({e})"),
513                })?;
514            let subject: Subject = subject_node.into();
515
516            // Generate one triple per predicate-object map
517            for pom in &rule.predicate_object_maps {
518                let result = self.build_triple(&subject, pom, row, row_idx);
519                match result {
520                    Ok(triple) => triples.push(triple),
521                    Err(e) => {
522                        if self.skip_errors {
523                            continue;
524                        }
525                        return Err(e);
526                    }
527                }
528            }
529        }
530        Ok(triples)
531    }
532
533    fn build_triple(
534        &self,
535        subject: &Subject,
536        pom: &PredicateObjectMap,
537        row: &Row,
538        row_idx: usize,
539    ) -> MappingResult<Triple> {
540        // Build predicate
541        let pred_node =
542            NamedNode::new(&pom.predicate).map_err(|_| MappingError::InvalidPredicateIri {
543                iri: pom.predicate.clone(),
544            })?;
545        let predicate: Predicate = pred_node.into();
546
547        // Build object
548        let object = self.resolve_object(&pom.object_template, row, row_idx)?;
549
550        Ok(Triple::new(subject.clone(), predicate, object))
551    }
552
553    fn resolve_object(
554        &self,
555        spec: &ObjectSpec,
556        row: &Row,
557        row_idx: usize,
558    ) -> MappingResult<Object> {
559        match spec {
560            ObjectSpec::Template(tpl) => {
561                let iri = tpl.render(row, row_idx)?;
562                let node = NamedNode::new(&iri)
563                    .map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
564                Ok(Object::NamedNode(node))
565            }
566
567            ObjectSpec::Column(col) => {
568                let value = row.get(col).ok_or_else(|| MappingError::MissingColumn {
569                    column: col.clone(),
570                    row_index: row_idx,
571                })?;
572                Ok(Object::Literal(Literal::new(value)))
573            }
574
575            ObjectSpec::Constant(value) => Ok(Object::Literal(Literal::new(value))),
576
577            ObjectSpec::TypedColumn { column, datatype } => {
578                let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
579                    column: column.clone(),
580                    row_index: row_idx,
581                })?;
582                let dt_node =
583                    NamedNode::new(datatype).map_err(|_| MappingError::InvalidObjectIri {
584                        iri: datatype.clone(),
585                    })?;
586                Ok(Object::Literal(Literal::new_typed_literal(value, dt_node)))
587            }
588
589            ObjectSpec::LangColumn {
590                column,
591                lang_column,
592            } => {
593                let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
594                    column: column.clone(),
595                    row_index: row_idx,
596                })?;
597                let lang = row
598                    .get(lang_column)
599                    .ok_or_else(|| MappingError::MissingColumn {
600                        column: lang_column.clone(),
601                        row_index: row_idx,
602                    })?;
603                let lit = Literal::new_language_tagged_literal(value, lang)
604                    .map_err(|e| MappingError::RdfModelError(e.to_string()))?;
605                Ok(Object::Literal(lit))
606            }
607
608            ObjectSpec::LangFixed { column, lang } => {
609                let value = row.get(column).ok_or_else(|| MappingError::MissingColumn {
610                    column: column.clone(),
611                    row_index: row_idx,
612                })?;
613                let lit = Literal::new_language_tagged_literal(value, lang)
614                    .map_err(|e| MappingError::RdfModelError(e.to_string()))?;
615                Ok(Object::Literal(lit))
616            }
617
618            ObjectSpec::ConstantIri(iri) => {
619                let node = NamedNode::new(iri)
620                    .map_err(|_| MappingError::InvalidObjectIri { iri: iri.clone() })?;
621                Ok(Object::NamedNode(node))
622            }
623        }
624    }
625
626    // ─── CSV parser ──────────────────────────────────────────────────────
627
628    /// Parse CSV content into (headers, rows).
629    ///
630    /// Handles:
631    /// - Configurable delimiter
632    /// - Double-quote escaping (`""` inside a quoted field)
633    /// - CRLF and LF line endings
634    /// - Quoted fields that span multiple lines
635    pub fn parse_csv(content: &str, delimiter: char) -> MappingResult<(Vec<String>, Vec<Row>)> {
636        let lines = split_csv_lines(content);
637        if lines.is_empty() {
638            return Ok((Vec::new(), Vec::new()));
639        }
640
641        // Parse header row
642        let headers = parse_csv_line(&lines[0], delimiter);
643        if headers.is_empty() {
644            return Err(MappingError::CsvParseError {
645                line: 1,
646                message: "empty header row".to_string(),
647            });
648        }
649
650        let mut rows = Vec::with_capacity(lines.len().saturating_sub(1));
651        for (line_idx, line) in lines.iter().enumerate().skip(1) {
652            if line.trim().is_empty() {
653                continue;
654            }
655            let values = parse_csv_line(line, delimiter);
656            if values.len() != headers.len() {
657                return Err(MappingError::CsvParseError {
658                    line: line_idx + 1,
659                    message: format!("expected {} fields but got {}", headers.len(), values.len()),
660                });
661            }
662            let row = Row::from_pairs(headers.iter().cloned().zip(values.into_iter()));
663            rows.push(row);
664        }
665        Ok((headers, rows))
666    }
667
668    // ─── JSON parser ─────────────────────────────────────────────────────
669
670    /// Parse JSON content into rows.
671    ///
672    /// Behaviour:
673    /// - If `json_path` is `None`, the root must be a JSON array of objects.
674    /// - If `json_path` is `Some("a.b.c")`, the engine traverses object keys
675    ///   `a` → `b` → `c` and expects to find an array there.
676    /// - Each array element must be a JSON object; its key-value pairs become
677    ///   the row fields (values are coerced to strings).
678    pub fn parse_json(content: &str, json_path: Option<&str>) -> MappingResult<Vec<Row>> {
679        let value: serde_json::Value =
680            serde_json::from_str(content).map_err(|e| MappingError::JsonParseError {
681                message: e.to_string(),
682            })?;
683
684        // Navigate to the target array using dot-separated path
685        let array = if let Some(path) = json_path {
686            navigate_json_path(&value, path)?
687        } else {
688            &value
689        };
690
691        let arr = array.as_array().ok_or_else(|| {
692            let path_desc = json_path.unwrap_or("<root>");
693            MappingError::JsonPathNoMatch {
694                path: path_desc.to_string(),
695            }
696        })?;
697
698        let mut rows = Vec::with_capacity(arr.len());
699        for element in arr {
700            let obj = element
701                .as_object()
702                .ok_or_else(|| MappingError::JsonParseError {
703                    message: "JSON array element is not an object".to_string(),
704                })?;
705            let row = Row::from_pairs(
706                obj.iter()
707                    .map(|(k, v)| (k.clone(), json_value_to_string(v))),
708            );
709            rows.push(row);
710        }
711        Ok(rows)
712    }
713}
714
715// ─── JSON helpers ─────────────────────────────────────────────────────────────
716
717fn navigate_json_path<'a>(
718    value: &'a serde_json::Value,
719    path: &str,
720) -> MappingResult<&'a serde_json::Value> {
721    let mut current = value;
722    for key in path.split('.') {
723        current = current
724            .get(key)
725            .ok_or_else(|| MappingError::JsonPathNoMatch {
726                path: path.to_string(),
727            })?;
728    }
729    Ok(current)
730}
731
732fn json_value_to_string(v: &serde_json::Value) -> String {
733    match v {
734        serde_json::Value::String(s) => s.clone(),
735        serde_json::Value::Null => String::new(),
736        serde_json::Value::Bool(b) => b.to_string(),
737        serde_json::Value::Number(n) => n.to_string(),
738        other => other.to_string(),
739    }
740}
741
742// ─── CSV helpers ──────────────────────────────────────────────────────────────
743
744/// Split CSV text into logical lines, handling quoted fields that contain newlines.
745fn split_csv_lines(content: &str) -> Vec<String> {
746    let mut lines = Vec::new();
747    let mut current = String::new();
748    let mut in_quotes = false;
749    let mut chars = content.chars().peekable();
750
751    while let Some(ch) = chars.next() {
752        match ch {
753            '"' => {
754                in_quotes = !in_quotes;
755                current.push(ch);
756            }
757            '\r' => {
758                // Handle CRLF
759                if chars.peek() == Some(&'\n') {
760                    let _ = chars.next();
761                }
762                if !in_quotes {
763                    lines.push(std::mem::take(&mut current));
764                } else {
765                    current.push('\n');
766                }
767            }
768            '\n' => {
769                if !in_quotes {
770                    lines.push(std::mem::take(&mut current));
771                } else {
772                    current.push(ch);
773                }
774            }
775            _ => {
776                current.push(ch);
777            }
778        }
779    }
780    if !current.is_empty() {
781        lines.push(current);
782    }
783    lines
784}
785
786/// Parse a single CSV line into a vector of field values.
787fn parse_csv_line(line: &str, delimiter: char) -> Vec<String> {
788    let mut fields = Vec::new();
789    let mut current = String::new();
790    let mut in_quotes = false;
791    let mut chars = line.chars().peekable();
792
793    while let Some(ch) = chars.next() {
794        if in_quotes {
795            if ch == '"' {
796                if chars.peek() == Some(&'"') {
797                    // Escaped double-quote inside quoted field
798                    current.push('"');
799                    let _ = chars.next();
800                } else {
801                    in_quotes = false;
802                }
803            } else {
804                current.push(ch);
805            }
806        } else if ch == '"' {
807            in_quotes = true;
808        } else if ch == delimiter {
809            fields.push(std::mem::take(&mut current));
810        } else {
811            current.push(ch);
812        }
813    }
814    fields.push(current);
815    fields
816}
817
818// ─── Builder ──────────────────────────────────────────────────────────────────
819
820/// Fluent builder for constructing [`MappingRule`] instances
821///
822/// # Example
823///
824/// ```rust
825/// use oxirs_ttl::mapping::{MappingRuleBuilder, ObjectSpec};
826///
827/// let rule = MappingRuleBuilder::new("employees")
828///     .csv_source("id,name\n1,Alice\n2,Bob")
829///     .subject_template("http://example.org/employee/{id}")
830///     .map("http://xmlns.com/foaf/0.1/name", ObjectSpec::Column("name".to_string()))
831///     .build();
832/// ```
833#[derive(Debug)]
834pub struct MappingRuleBuilder {
835    rule: MappingRule,
836}
837
838impl MappingRuleBuilder {
839    /// Start building a new rule with the given name
840    pub fn new(name: impl Into<String>) -> Self {
841        let name_str = name.into();
842        Self {
843            rule: MappingRule {
844                name: name_str,
845                source: DataSource::Csv {
846                    content: String::new(),
847                    delimiter: ',',
848                },
849                subject_template: Template::new(""),
850                predicate_object_maps: Vec::new(),
851                graph_name: None,
852            },
853        }
854    }
855
856    /// Use a CSV string as the data source (comma delimiter)
857    pub fn csv_source(mut self, content: impl Into<String>) -> Self {
858        self.rule.source = DataSource::Csv {
859            content: content.into(),
860            delimiter: ',',
861        };
862        self
863    }
864
865    /// Use a CSV string with a custom delimiter
866    pub fn csv_source_with_delimiter(
867        mut self,
868        content: impl Into<String>,
869        delimiter: char,
870    ) -> Self {
871        self.rule.source = DataSource::Csv {
872            content: content.into(),
873            delimiter,
874        };
875        self
876    }
877
878    /// Use a JSON string as the data source (root must be an array)
879    pub fn json_source(mut self, content: impl Into<String>) -> Self {
880        self.rule.source = DataSource::Json {
881            content: content.into(),
882            json_path: None,
883        };
884        self
885    }
886
887    /// Use a JSON string with a dot-separated path to the target array
888    pub fn json_source_with_path(
889        mut self,
890        content: impl Into<String>,
891        json_path: impl Into<String>,
892    ) -> Self {
893        self.rule.source = DataSource::Json {
894            content: content.into(),
895            json_path: Some(json_path.into()),
896        };
897        self
898    }
899
900    /// Use pre-parsed inline values
901    pub fn inline_source(mut self, headers: Vec<String>, rows: Vec<Vec<String>>) -> Self {
902        self.rule.source = DataSource::InlineValues { rows, headers };
903        self
904    }
905
906    /// Set the subject IRI template
907    pub fn subject_template(mut self, template: impl Into<String>) -> Self {
908        self.rule.subject_template = Template::new(template);
909        self
910    }
911
912    /// Add a predicate-object mapping
913    pub fn map(mut self, predicate: impl Into<String>, object: ObjectSpec) -> Self {
914        self.rule.predicate_object_maps.push(PredicateObjectMap {
915            predicate: predicate.into(),
916            object_template: object,
917        });
918        self
919    }
920
921    /// Assign all produced triples to a named graph
922    pub fn graph(mut self, graph_name: impl Into<String>) -> Self {
923        self.rule.graph_name = Some(graph_name.into());
924        self
925    }
926
927    /// Consume the builder and return the finished [`MappingRule`]
928    pub fn build(self) -> MappingRule {
929        self.rule
930    }
931}
932
933// ─── Tests ────────────────────────────────────────────────────────────────────
934
935#[cfg(test)]
936mod tests {
937    use super::*;
938
939    // ── helpers ──────────────────────────────────────────────────────────
940
941    fn engine() -> MappingEngine {
942        MappingEngine::new()
943    }
944
945    fn lenient_engine() -> MappingEngine {
946        MappingEngine::new_lenient()
947    }
948
949    fn xsd(local: &str) -> String {
950        format!("http://www.w3.org/2001/XMLSchema#{local}")
951    }
952
953    fn ex(local: &str) -> String {
954        format!("http://example.org/{local}")
955    }
956
957    fn foaf(local: &str) -> String {
958        format!("http://xmlns.com/foaf/0.1/{local}")
959    }
960
961    // ── Template tests ───────────────────────────────────────────────────
962
963    #[test]
964    fn test_template_simple_substitution() {
965        let tpl = Template::new("http://example.org/{id}");
966        let mut row = Row::new();
967        row.values.insert("id".to_string(), "42".to_string());
968        let result = tpl.render(&row, 0).expect("should succeed");
969        assert_eq!(result, "http://example.org/42");
970    }
971
972    #[test]
973    fn test_template_multiple_placeholders() {
974        let tpl = Template::new("http://example.org/{type}/{id}");
975        let mut row = Row::new();
976        row.values.insert("type".to_string(), "person".to_string());
977        row.values.insert("id".to_string(), "7".to_string());
978        let result = tpl.render(&row, 0).expect("should succeed");
979        assert_eq!(result, "http://example.org/person/7");
980    }
981
982    #[test]
983    fn test_template_percent_encoding() {
984        let tpl = Template::new("http://example.org/{name}");
985        let mut row = Row::new();
986        row.values
987            .insert("name".to_string(), "hello world".to_string());
988        let result = tpl.render(&row, 0).expect("should succeed");
989        assert_eq!(result, "http://example.org/hello%20world");
990    }
991
992    #[test]
993    fn test_template_missing_column_error() {
994        let tpl = Template::new("http://example.org/{missing}");
995        let row = Row::new();
996        let err = tpl.render(&row, 3).unwrap_err();
997        match err {
998            MappingError::UnresolvableTemplate {
999                column, row_index, ..
1000            } => {
1001                assert_eq!(column, "missing");
1002                assert_eq!(row_index, 3);
1003            }
1004            other => panic!("unexpected error: {other}"),
1005        }
1006    }
1007
1008    #[test]
1009    fn test_template_no_placeholders() {
1010        let tpl = Template::new("http://example.org/constant");
1011        let row = Row::new();
1012        let result = tpl.render(&row, 0).expect("should succeed");
1013        assert_eq!(result, "http://example.org/constant");
1014    }
1015
1016    #[test]
1017    fn test_template_slash_encoded() {
1018        let tpl = Template::new("http://example.org/{path}");
1019        let mut row = Row::new();
1020        row.values.insert("path".to_string(), "a/b/c".to_string());
1021        let result = tpl.render(&row, 0).expect("should succeed");
1022        // '/' is not in RFC 3986 unreserved, should be encoded
1023        assert_eq!(result, "http://example.org/a%2Fb%2Fc");
1024    }
1025
1026    // ── Row tests ────────────────────────────────────────────────────────
1027
1028    #[test]
1029    fn test_row_get() {
1030        let mut row = Row::new();
1031        row.values.insert("key".to_string(), "value".to_string());
1032        assert_eq!(row.get("key"), Some("value"));
1033        assert_eq!(row.get("absent"), None);
1034    }
1035
1036    #[test]
1037    fn test_row_contains() {
1038        let row = Row::from_pairs(vec![("x".to_string(), "1".to_string())]);
1039        assert!(row.contains("x"));
1040        assert!(!row.contains("y"));
1041    }
1042
1043    #[test]
1044    fn test_row_display() {
1045        let row = Row::from_pairs(vec![("a".to_string(), "1".to_string())]);
1046        let s = format!("{row}");
1047        assert!(s.contains("a: 1"));
1048    }
1049
1050    // ── CSV parsing tests ────────────────────────────────────────────────
1051
1052    #[test]
1053    fn test_csv_basic_parse() {
1054        let csv = "id,name,age\n1,Alice,30\n2,Bob,25";
1055        let (headers, rows) = MappingEngine::parse_csv(csv, ',').expect("should succeed");
1056        assert_eq!(headers, vec!["id", "name", "age"]);
1057        assert_eq!(rows.len(), 2);
1058        assert_eq!(rows[0].get("name"), Some("Alice"));
1059        assert_eq!(rows[1].get("age"), Some("25"));
1060    }
1061
1062    #[test]
1063    fn test_csv_tab_delimiter() {
1064        let csv = "id\tvalue\n1\thello\n2\tworld";
1065        let (_headers, rows) = MappingEngine::parse_csv(csv, '\t').expect("should succeed");
1066        assert_eq!(rows.len(), 2);
1067        assert_eq!(rows[0].get("value"), Some("hello"));
1068    }
1069
1070    #[test]
1071    fn test_csv_quoted_fields() {
1072        let csv = "id,desc\n1,\"hello, world\"\n2,simple";
1073        let (_headers, rows) = MappingEngine::parse_csv(csv, ',').expect("should succeed");
1074        assert_eq!(rows[0].get("desc"), Some("hello, world"));
1075        assert_eq!(rows[1].get("desc"), Some("simple"));
1076    }
1077
1078    #[test]
1079    fn test_csv_escaped_quotes() {
1080        let csv = "id,text\n1,\"say \"\"hi\"\"\"\n";
1081        let (_headers, rows) = MappingEngine::parse_csv(csv, ',').expect("should succeed");
1082        assert_eq!(rows[0].get("text"), Some("say \"hi\""));
1083    }
1084
1085    #[test]
1086    fn test_csv_crlf_endings() {
1087        let csv = "id,name\r\n1,Alice\r\n2,Bob\r\n";
1088        let (_headers, rows) = MappingEngine::parse_csv(csv, ',').expect("should succeed");
1089        assert_eq!(rows.len(), 2);
1090        assert_eq!(rows[0].get("name"), Some("Alice"));
1091    }
1092
1093    #[test]
1094    fn test_csv_semicolon_delimiter() {
1095        let csv = "id;value\n1;alpha\n2;beta";
1096        let (_headers, rows) = MappingEngine::parse_csv(csv, ';').expect("should succeed");
1097        assert_eq!(rows[0].get("value"), Some("alpha"));
1098        assert_eq!(rows[1].get("value"), Some("beta"));
1099    }
1100
1101    #[test]
1102    fn test_csv_empty_content_returns_empty() {
1103        let (headers, rows) = MappingEngine::parse_csv("", ',').expect("should succeed");
1104        assert!(headers.is_empty());
1105        assert!(rows.is_empty());
1106    }
1107
1108    #[test]
1109    fn test_csv_field_count_mismatch_error() {
1110        let csv = "id,name\n1,Alice,extra\n";
1111        let err = MappingEngine::parse_csv(csv, ',').unwrap_err();
1112        assert!(matches!(err, MappingError::CsvParseError { .. }));
1113    }
1114
1115    #[test]
1116    fn test_csv_trailing_empty_lines_skipped() {
1117        let csv = "id,name\n1,Alice\n\n\n";
1118        let (_headers, rows) = MappingEngine::parse_csv(csv, ',').expect("should succeed");
1119        assert_eq!(rows.len(), 1);
1120    }
1121
1122    // ── JSON parsing tests ───────────────────────────────────────────────
1123
1124    #[test]
1125    fn test_json_flat_objects() {
1126        let json = r#"[{"id":"1","name":"Alice"},{"id":"2","name":"Bob"}]"#;
1127        let rows = MappingEngine::parse_json(json, None).expect("should succeed");
1128        assert_eq!(rows.len(), 2);
1129        assert_eq!(rows[0].get("name"), Some("Alice"));
1130        assert_eq!(rows[1].get("id"), Some("2"));
1131    }
1132
1133    #[test]
1134    fn test_json_nested_path() {
1135        let json = r#"{"data":{"people":[{"id":"1","name":"Alice"}]}}"#;
1136        let rows = MappingEngine::parse_json(json, Some("data.people")).expect("should succeed");
1137        assert_eq!(rows.len(), 1);
1138        assert_eq!(rows[0].get("name"), Some("Alice"));
1139    }
1140
1141    #[test]
1142    fn test_json_numeric_values_coerced() {
1143        let json = r#"[{"id":1,"score":9.5,"active":true}]"#;
1144        let rows = MappingEngine::parse_json(json, None).expect("should succeed");
1145        assert_eq!(rows[0].get("id"), Some("1"));
1146        assert_eq!(rows[0].get("score"), Some("9.5"));
1147        assert_eq!(rows[0].get("active"), Some("true"));
1148    }
1149
1150    #[test]
1151    fn test_json_null_value_becomes_empty() {
1152        let json = r#"[{"id":"1","name":null}]"#;
1153        let rows = MappingEngine::parse_json(json, None).expect("should succeed");
1154        assert_eq!(rows[0].get("name"), Some(""));
1155    }
1156
1157    #[test]
1158    fn test_json_invalid_json_error() {
1159        let err = MappingEngine::parse_json("not json", None).unwrap_err();
1160        assert!(matches!(err, MappingError::JsonParseError { .. }));
1161    }
1162
1163    #[test]
1164    fn test_json_path_no_match_error() {
1165        let json = r#"{"a":{}}"#;
1166        let err = MappingEngine::parse_json(json, Some("a.b.c")).unwrap_err();
1167        assert!(matches!(err, MappingError::JsonPathNoMatch { .. }));
1168    }
1169
1170    #[test]
1171    fn test_json_root_not_array_error() {
1172        let json = r#"{"key":"value"}"#;
1173        let err = MappingEngine::parse_json(json, None).unwrap_err();
1174        assert!(matches!(err, MappingError::JsonPathNoMatch { .. }));
1175    }
1176
1177    #[test]
1178    fn test_json_empty_array() {
1179        let json = r#"[]"#;
1180        let rows = MappingEngine::parse_json(json, None).expect("should succeed");
1181        assert!(rows.is_empty());
1182    }
1183
1184    // ── Basic CSV mapping tests ──────────────────────────────────────────
1185
1186    #[test]
1187    fn test_csv_mapping_single_predicate() {
1188        let csv = "id,name\n1,Alice";
1189        let rule = MappingRuleBuilder::new("test")
1190            .csv_source(csv)
1191            .subject_template(ex("{id}"))
1192            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1193            .build();
1194        let triples = engine().execute(&rule).expect("should succeed");
1195        assert_eq!(triples.len(), 1);
1196        let t = &triples[0];
1197        assert_eq!(t.subject().to_string(), format!("<{}>", ex("1")));
1198        assert_eq!(t.predicate().to_string(), format!("<{}>", foaf("name")));
1199        assert!(t.object().to_string().contains("Alice"));
1200    }
1201
1202    #[test]
1203    fn test_csv_mapping_two_rows_two_predicates() {
1204        let csv = "id,name,age\n1,Alice,30\n2,Bob,25";
1205        let rule = MappingRuleBuilder::new("test")
1206            .csv_source(csv)
1207            .subject_template(ex("{id}"))
1208            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1209            .map(foaf("age"), ObjectSpec::Column("age".to_string()))
1210            .build();
1211        let triples = engine().execute(&rule).expect("should succeed");
1212        assert_eq!(triples.len(), 4); // 2 rows × 2 predicates
1213    }
1214
1215    #[test]
1216    fn test_csv_mapping_typed_integer() {
1217        let csv = "id,age\n1,42";
1218        let rule = MappingRuleBuilder::new("test")
1219            .csv_source(csv)
1220            .subject_template(ex("{id}"))
1221            .map(
1222                foaf("age"),
1223                ObjectSpec::TypedColumn {
1224                    column: "age".to_string(),
1225                    datatype: xsd("integer"),
1226                },
1227            )
1228            .build();
1229        let triples = engine().execute(&rule).expect("should succeed");
1230        assert_eq!(triples.len(), 1);
1231        let obj = triples[0].object().to_string();
1232        assert!(obj.contains("42"), "object should contain 42, got: {obj}");
1233        assert!(
1234            obj.contains("integer"),
1235            "object should contain xsd:integer, got: {obj}"
1236        );
1237    }
1238
1239    #[test]
1240    fn test_csv_mapping_typed_date() {
1241        let csv = "id,dob\n1,1990-01-15";
1242        let rule = MappingRuleBuilder::new("test")
1243            .csv_source(csv)
1244            .subject_template(ex("{id}"))
1245            .map(
1246                ex("dob"),
1247                ObjectSpec::TypedColumn {
1248                    column: "dob".to_string(),
1249                    datatype: xsd("date"),
1250                },
1251            )
1252            .build();
1253        let triples = engine().execute(&rule).expect("should succeed");
1254        assert_eq!(triples.len(), 1);
1255        let obj = triples[0].object().to_string();
1256        assert!(obj.contains("1990-01-15"));
1257        assert!(obj.contains("date"));
1258    }
1259
1260    #[test]
1261    fn test_csv_mapping_constant_object() {
1262        let csv = "id\n1\n2";
1263        let rule = MappingRuleBuilder::new("test")
1264            .csv_source(csv)
1265            .subject_template(ex("{id}"))
1266            .map(
1267                "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
1268                ObjectSpec::Constant("Person".to_string()),
1269            )
1270            .build();
1271        let triples = engine().execute(&rule).expect("should succeed");
1272        assert_eq!(triples.len(), 2);
1273        for t in &triples {
1274            assert!(t.object().to_string().contains("Person"));
1275        }
1276    }
1277
1278    #[test]
1279    fn test_csv_mapping_constant_iri_object() {
1280        let csv = "id\n1";
1281        let person_class = ex("Person");
1282        let rule = MappingRuleBuilder::new("test")
1283            .csv_source(csv)
1284            .subject_template(ex("{id}"))
1285            .map(
1286                "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
1287                ObjectSpec::ConstantIri(person_class.clone()),
1288            )
1289            .build();
1290        let triples = engine().execute(&rule).expect("should succeed");
1291        assert_eq!(triples.len(), 1);
1292        let obj = triples[0].object().to_string();
1293        assert!(obj.contains(&person_class));
1294    }
1295
1296    #[test]
1297    fn test_csv_mapping_template_object() {
1298        let csv = "id,dept\n1,sales";
1299        let rule = MappingRuleBuilder::new("test")
1300            .csv_source(csv)
1301            .subject_template(ex("{id}"))
1302            .map(
1303                ex("department"),
1304                ObjectSpec::Template(Template::new("http://example.org/dept/{dept}")),
1305            )
1306            .build();
1307        let triples = engine().execute(&rule).expect("should succeed");
1308        assert_eq!(triples.len(), 1);
1309        let obj = triples[0].object().to_string();
1310        assert!(obj.contains("sales"), "got: {obj}");
1311    }
1312
1313    #[test]
1314    fn test_csv_mapping_lang_fixed() {
1315        let csv = "id,label\n1,Hello";
1316        let rule = MappingRuleBuilder::new("test")
1317            .csv_source(csv)
1318            .subject_template(ex("{id}"))
1319            .map(
1320                ex("label"),
1321                ObjectSpec::LangFixed {
1322                    column: "label".to_string(),
1323                    lang: "en".to_string(),
1324                },
1325            )
1326            .build();
1327        let triples = engine().execute(&rule).expect("should succeed");
1328        assert_eq!(triples.len(), 1);
1329        let obj = triples[0].object().to_string();
1330        assert!(obj.contains("Hello"), "got: {obj}");
1331        assert!(obj.contains("en"), "got: {obj}");
1332    }
1333
1334    #[test]
1335    fn test_csv_mapping_lang_column() {
1336        let csv = "id,label,lang\n1,Bonjour,fr";
1337        let rule = MappingRuleBuilder::new("test")
1338            .csv_source(csv)
1339            .subject_template(ex("{id}"))
1340            .map(
1341                ex("label"),
1342                ObjectSpec::LangColumn {
1343                    column: "label".to_string(),
1344                    lang_column: "lang".to_string(),
1345                },
1346            )
1347            .build();
1348        let triples = engine().execute(&rule).expect("should succeed");
1349        assert_eq!(triples.len(), 1);
1350        let obj = triples[0].object().to_string();
1351        assert!(obj.contains("Bonjour"), "got: {obj}");
1352        assert!(obj.contains("fr"), "got: {obj}");
1353    }
1354
1355    // ── Named graph tests ────────────────────────────────────────────────
1356
1357    #[test]
1358    fn test_named_graph_assignment() {
1359        let csv = "id\n1";
1360        let graph = "http://example.org/graph1";
1361        let rule = MappingRuleBuilder::new("test")
1362            .csv_source(csv)
1363            .subject_template(ex("{id}"))
1364            .map(ex("type"), ObjectSpec::Constant("X".to_string()))
1365            .graph(graph)
1366            .build();
1367        assert_eq!(rule.graph_name.as_deref(), Some(graph));
1368        // Engine still produces triples (named graph metadata is on rule)
1369        let triples = engine().execute(&rule).expect("should succeed");
1370        assert_eq!(triples.len(), 1);
1371    }
1372
1373    // ── JSON mapping tests ───────────────────────────────────────────────
1374
1375    #[test]
1376    fn test_json_mapping_flat() {
1377        let json = r#"[{"id":"1","name":"Alice"},{"id":"2","name":"Bob"}]"#;
1378        let rule = MappingRuleBuilder::new("test")
1379            .json_source(json)
1380            .subject_template(ex("{id}"))
1381            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1382            .build();
1383        let triples = engine().execute(&rule).expect("should succeed");
1384        assert_eq!(triples.len(), 2);
1385    }
1386
1387    #[test]
1388    fn test_json_mapping_nested_path() {
1389        let json = r#"{"items":[{"id":"10","val":"x"},{"id":"20","val":"y"}]}"#;
1390        let rule = MappingRuleBuilder::new("test")
1391            .json_source_with_path(json, "items")
1392            .subject_template(ex("{id}"))
1393            .map(ex("val"), ObjectSpec::Column("val".to_string()))
1394            .build();
1395        let triples = engine().execute(&rule).expect("should succeed");
1396        assert_eq!(triples.len(), 2);
1397    }
1398
1399    #[test]
1400    fn test_json_mapping_typed_integer_column() {
1401        let json = r#"[{"id":"1","count":42}]"#;
1402        let rule = MappingRuleBuilder::new("test")
1403            .json_source(json)
1404            .subject_template(ex("{id}"))
1405            .map(
1406                ex("count"),
1407                ObjectSpec::TypedColumn {
1408                    column: "count".to_string(),
1409                    datatype: xsd("integer"),
1410                },
1411            )
1412            .build();
1413        let triples = engine().execute(&rule).expect("should succeed");
1414        assert_eq!(triples.len(), 1);
1415        let obj = triples[0].object().to_string();
1416        assert!(obj.contains("42"));
1417        assert!(obj.contains("integer"));
1418    }
1419
1420    #[test]
1421    fn test_json_mapping_multi_predicates() {
1422        let json = r#"[{"id":"1","name":"Alice","age":"30","city":"NYC"}]"#;
1423        let rule = MappingRuleBuilder::new("test")
1424            .json_source(json)
1425            .subject_template(ex("{id}"))
1426            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1427            .map(foaf("age"), ObjectSpec::Column("age".to_string()))
1428            .map(ex("city"), ObjectSpec::Column("city".to_string()))
1429            .build();
1430        let triples = engine().execute(&rule).expect("should succeed");
1431        assert_eq!(triples.len(), 3);
1432    }
1433
1434    // ── Inline values tests ──────────────────────────────────────────────
1435
1436    #[test]
1437    fn test_inline_values_mapping() {
1438        let rule = MappingRuleBuilder::new("test")
1439            .inline_source(
1440                vec!["id".to_string(), "name".to_string()],
1441                vec![
1442                    vec!["1".to_string(), "Alice".to_string()],
1443                    vec!["2".to_string(), "Bob".to_string()],
1444                ],
1445            )
1446            .subject_template(ex("{id}"))
1447            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1448            .build();
1449        let triples = engine().execute(&rule).expect("should succeed");
1450        assert_eq!(triples.len(), 2);
1451    }
1452
1453    // ── Batch execution tests ────────────────────────────────────────────
1454
1455    #[test]
1456    fn test_execute_all_multiple_rules() {
1457        let csv1 = "id,name\n1,Alice";
1458        let csv2 = "id,name\n100,Bob";
1459        let rule1 = MappingRuleBuilder::new("r1")
1460            .csv_source(csv1)
1461            .subject_template(ex("{id}"))
1462            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1463            .build();
1464        let rule2 = MappingRuleBuilder::new("r2")
1465            .csv_source(csv2)
1466            .subject_template(ex("{id}"))
1467            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1468            .build();
1469        let triples = engine()
1470            .execute_all(&[rule1, rule2])
1471            .expect("should succeed");
1472        assert_eq!(triples.len(), 2);
1473    }
1474
1475    #[test]
1476    fn test_execute_all_empty_rules() {
1477        let triples = engine().execute_all(&[]).expect("should succeed");
1478        assert!(triples.is_empty());
1479    }
1480
1481    // ── Error case tests ─────────────────────────────────────────────────
1482
1483    #[test]
1484    fn test_missing_column_error() {
1485        let csv = "id\n1";
1486        let rule = MappingRuleBuilder::new("test")
1487            .csv_source(csv)
1488            .subject_template(ex("{id}"))
1489            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1490            .build();
1491        let err = engine().execute(&rule).unwrap_err();
1492        assert!(matches!(err, MappingError::MissingColumn { column, .. } if column == "name"));
1493    }
1494
1495    #[test]
1496    fn test_missing_subject_column_error() {
1497        let csv = "name\nAlice";
1498        let rule = MappingRuleBuilder::new("test")
1499            .csv_source(csv)
1500            .subject_template(ex("{missing_id}"))
1501            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1502            .build();
1503        let err = engine().execute(&rule).unwrap_err();
1504        assert!(matches!(err, MappingError::UnresolvableTemplate { .. }));
1505    }
1506
1507    #[test]
1508    fn test_lenient_engine_skips_bad_rows() {
1509        let csv = "id,name\n1,Alice\n2,Bob";
1510        // Subject template referencing column that does not exist for a "ghost" row
1511        // We test this via a bad predicate-object map with a missing column
1512        let rule = MappingRuleBuilder::new("test")
1513            .csv_source(csv)
1514            .subject_template(ex("{id}"))
1515            // "score" does not exist; lenient engine should skip those triples
1516            .map(ex("score"), ObjectSpec::Column("score".to_string()))
1517            .build();
1518        let triples = lenient_engine().execute(&rule).expect("should succeed");
1519        // Both rows fail on the score column; lenient skips them
1520        assert_eq!(triples.len(), 0);
1521    }
1522
1523    #[test]
1524    fn test_invalid_predicate_iri_error() {
1525        let csv = "id\n1";
1526        let rule = MappingRuleBuilder::new("test")
1527            .csv_source(csv)
1528            .subject_template(ex("{id}"))
1529            .map("not a valid iri", ObjectSpec::Constant("x".to_string()))
1530            .build();
1531        let err = engine().execute(&rule).unwrap_err();
1532        assert!(matches!(err, MappingError::InvalidPredicateIri { .. }));
1533    }
1534
1535    #[test]
1536    fn test_invalid_subject_iri_error() {
1537        let csv = "id\n1";
1538        let rule = MappingRuleBuilder::new("test")
1539            .csv_source(csv)
1540            // Template produces a string that may not be a valid absolute IRI
1541            .subject_template("not-an-iri/{id}")
1542            .map(foaf("name"), ObjectSpec::Constant("x".to_string()))
1543            .build();
1544        let err = engine().execute(&rule).unwrap_err();
1545        assert!(matches!(err, MappingError::InvalidIri { .. }));
1546    }
1547
1548    // ── Builder pattern tests ────────────────────────────────────────────
1549
1550    #[test]
1551    fn test_builder_chain() {
1552        let rule = MappingRuleBuilder::new("chain_test")
1553            .csv_source("id,x,y\n1,2,3")
1554            .subject_template(ex("{id}"))
1555            .map(ex("x"), ObjectSpec::Column("x".to_string()))
1556            .map(ex("y"), ObjectSpec::Column("y".to_string()))
1557            .graph("http://example.org/g1")
1558            .build();
1559        assert_eq!(rule.name, "chain_test");
1560        assert_eq!(rule.predicate_object_maps.len(), 2);
1561        assert_eq!(rule.graph_name.as_deref(), Some("http://example.org/g1"));
1562    }
1563
1564    #[test]
1565    fn test_builder_csv_with_delimiter() {
1566        let rule = MappingRuleBuilder::new("pipe")
1567            .csv_source_with_delimiter("id|name\n1|Alice", '|')
1568            .subject_template(ex("{id}"))
1569            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1570            .build();
1571        let triples = engine().execute(&rule).expect("should succeed");
1572        assert_eq!(triples.len(), 1);
1573        assert!(triples[0].object().to_string().contains("Alice"));
1574    }
1575
1576    #[test]
1577    fn test_builder_json_source_with_path() {
1578        let json = r#"{"list":[{"id":"5","v":"ok"}]}"#;
1579        let rule = MappingRuleBuilder::new("j")
1580            .json_source_with_path(json, "list")
1581            .subject_template(ex("{id}"))
1582            .map(ex("v"), ObjectSpec::Column("v".to_string()))
1583            .build();
1584        let triples = engine().execute(&rule).expect("should succeed");
1585        assert_eq!(triples.len(), 1);
1586        assert!(triples[0].object().to_string().contains("ok"));
1587    }
1588
1589    // ── IRI generation tests ─────────────────────────────────────────────
1590
1591    #[test]
1592    fn test_iri_from_column_value() {
1593        let csv = "id,related_id\n1,99";
1594        let rule = MappingRuleBuilder::new("test")
1595            .csv_source(csv)
1596            .subject_template(ex("{id}"))
1597            .map(
1598                ex("related"),
1599                ObjectSpec::Template(Template::new("http://example.org/item/{related_id}")),
1600            )
1601            .build();
1602        let triples = engine().execute(&rule).expect("should succeed");
1603        assert_eq!(triples.len(), 1);
1604        let obj = triples[0].object().to_string();
1605        assert!(obj.contains("99"), "got: {obj}");
1606    }
1607
1608    #[test]
1609    fn test_iri_generation_with_special_chars() {
1610        let csv = "id\nhello world";
1611        let rule = MappingRuleBuilder::new("test")
1612            .csv_source(csv)
1613            .subject_template(ex("{id}"))
1614            .map(ex("self"), ObjectSpec::ConstantIri(ex("x")))
1615            .build();
1616        let triples = engine().execute(&rule).expect("should succeed");
1617        assert_eq!(triples.len(), 1);
1618        // Subject should have space encoded as %20
1619        let subj = triples[0].subject().to_string();
1620        assert!(subj.contains("%20"), "got: {subj}");
1621    }
1622
1623    // ── Multiple rules interaction tests ─────────────────────────────────
1624
1625    #[test]
1626    fn test_multiple_rules_different_sources() {
1627        let csv = "id,label\n1,CSV-item";
1628        let json = r#"[{"id":"2","label":"JSON-item"}]"#;
1629        let rule_csv = MappingRuleBuilder::new("r_csv")
1630            .csv_source(csv)
1631            .subject_template(ex("{id}"))
1632            .map(ex("label"), ObjectSpec::Column("label".to_string()))
1633            .build();
1634        let rule_json = MappingRuleBuilder::new("r_json")
1635            .json_source(json)
1636            .subject_template(ex("{id}"))
1637            .map(ex("label"), ObjectSpec::Column("label".to_string()))
1638            .build();
1639        let triples = engine()
1640            .execute_all(&[rule_csv, rule_json])
1641            .expect("should succeed");
1642        assert_eq!(triples.len(), 2);
1643    }
1644
1645    // ── Typed literal tests ──────────────────────────────────────────────
1646
1647    #[test]
1648    fn test_typed_literal_float() {
1649        let csv = "id,score\n1,3.14";
1650        let rule = MappingRuleBuilder::new("test")
1651            .csv_source(csv)
1652            .subject_template(ex("{id}"))
1653            .map(
1654                ex("score"),
1655                ObjectSpec::TypedColumn {
1656                    column: "score".to_string(),
1657                    datatype: xsd("decimal"),
1658                },
1659            )
1660            .build();
1661        let triples = engine().execute(&rule).expect("should succeed");
1662        assert_eq!(triples.len(), 1);
1663        let obj = triples[0].object().to_string();
1664        assert!(obj.contains("3.14"));
1665        assert!(obj.contains("decimal"));
1666    }
1667
1668    #[test]
1669    fn test_typed_literal_boolean() {
1670        let csv = "id,active\n1,true";
1671        let rule = MappingRuleBuilder::new("test")
1672            .csv_source(csv)
1673            .subject_template(ex("{id}"))
1674            .map(
1675                ex("active"),
1676                ObjectSpec::TypedColumn {
1677                    column: "active".to_string(),
1678                    datatype: xsd("boolean"),
1679                },
1680            )
1681            .build();
1682        let triples = engine().execute(&rule).expect("should succeed");
1683        assert_eq!(triples.len(), 1);
1684        let obj = triples[0].object().to_string();
1685        assert!(obj.contains("true"));
1686        assert!(obj.contains("boolean"));
1687    }
1688
1689    // ── Edge case tests ──────────────────────────────────────────────────
1690
1691    #[test]
1692    fn test_empty_csv_produces_no_triples() {
1693        let rule = MappingRuleBuilder::new("empty")
1694            .csv_source("")
1695            .subject_template(ex("{id}"))
1696            .build();
1697        let triples = engine().execute(&rule).expect("should succeed");
1698        assert!(triples.is_empty());
1699    }
1700
1701    #[test]
1702    fn test_csv_only_header_produces_no_triples() {
1703        let rule = MappingRuleBuilder::new("header-only")
1704            .csv_source("id,name")
1705            .subject_template(ex("{id}"))
1706            .map(foaf("name"), ObjectSpec::Column("name".to_string()))
1707            .build();
1708        let triples = engine().execute(&rule).expect("should succeed");
1709        assert!(triples.is_empty());
1710    }
1711
1712    #[test]
1713    fn test_json_array_empty_produces_no_triples() {
1714        let rule = MappingRuleBuilder::new("empty-json")
1715            .json_source("[]")
1716            .subject_template(ex("{id}"))
1717            .build();
1718        let triples = engine().execute(&rule).expect("should succeed");
1719        assert!(triples.is_empty());
1720    }
1721
1722    #[test]
1723    fn test_no_predicate_object_maps_produces_no_triples() {
1724        let csv = "id\n1\n2";
1725        let rule = MappingRuleBuilder::new("no-pom")
1726            .csv_source(csv)
1727            .subject_template(ex("{id}"))
1728            .build();
1729        let triples = engine().execute(&rule).expect("should succeed");
1730        assert!(triples.is_empty());
1731    }
1732
1733    #[test]
1734    fn test_percent_encode_unicode() {
1735        let tpl = Template::new("http://example.org/{name}");
1736        let mut row = Row::new();
1737        row.values
1738            .insert("name".to_string(), "こんにちは".to_string());
1739        let result = tpl.render(&row, 0).expect("should succeed");
1740        // Should be percent-encoded
1741        assert!(result.starts_with("http://example.org/%"));
1742        assert!(!result.contains("こんにちは"));
1743    }
1744
1745    #[test]
1746    fn test_csv_mapping_pipe_delimiter_multi_row() {
1747        let csv = "id|label\n10|alpha\n20|beta\n30|gamma";
1748        let rule = MappingRuleBuilder::new("pipe-multi")
1749            .csv_source_with_delimiter(csv, '|')
1750            .subject_template(ex("{id}"))
1751            .map(ex("label"), ObjectSpec::Column("label".to_string()))
1752            .build();
1753        let triples = engine().execute(&rule).expect("should succeed");
1754        assert_eq!(triples.len(), 3);
1755    }
1756
1757    #[test]
1758    fn test_mapping_engine_default() {
1759        let engine = MappingEngine::default();
1760        assert!(!engine.skip_errors);
1761    }
1762
1763    #[test]
1764    fn test_mapping_rule_add_pom() {
1765        let mut rule = MappingRule::new(
1766            "r",
1767            DataSource::Csv {
1768                content: "id\n1".to_string(),
1769                delimiter: ',',
1770            },
1771            Template::new(ex("{id}")),
1772        );
1773        assert!(rule.predicate_object_maps.is_empty());
1774        rule.add_predicate_object_map(PredicateObjectMap::new(
1775            ex("p"),
1776            ObjectSpec::Constant("v".to_string()),
1777        ));
1778        assert_eq!(rule.predicate_object_maps.len(), 1);
1779    }
1780
1781    #[test]
1782    fn test_predicate_object_map_construction() {
1783        let pom = PredicateObjectMap::new(
1784            "http://example.org/pred",
1785            ObjectSpec::Column("col".to_string()),
1786        );
1787        assert_eq!(pom.predicate, "http://example.org/pred");
1788    }
1789
1790    #[test]
1791    fn test_row_from_pairs() {
1792        let row = Row::from_pairs(vec![
1793            ("a".to_string(), "1".to_string()),
1794            ("b".to_string(), "2".to_string()),
1795        ]);
1796        assert_eq!(row.get("a"), Some("1"));
1797        assert_eq!(row.get("b"), Some("2"));
1798    }
1799
1800    #[test]
1801    fn test_json_deeply_nested_path() {
1802        let json = r#"{"a":{"b":{"c":[{"id":"1","name":"deep"}]}}}"#;
1803        let rows = MappingEngine::parse_json(json, Some("a.b.c")).expect("should succeed");
1804        assert_eq!(rows.len(), 1);
1805        assert_eq!(rows[0].get("name"), Some("deep"));
1806    }
1807
1808    #[test]
1809    fn test_csv_quoted_field_with_newline() {
1810        let csv = "id,desc\n1,\"line1\nline2\"\n2,simple";
1811        let (_headers, rows) = MappingEngine::parse_csv(csv, ',').expect("should succeed");
1812        assert_eq!(rows.len(), 2);
1813        assert!(rows[0].get("desc").expect("should succeed").contains('\n'));
1814        assert_eq!(rows[1].get("desc"), Some("simple"));
1815    }
1816
1817    #[test]
1818    fn test_template_display() {
1819        let tpl = Template::new("http://example.org/{id}");
1820        assert_eq!(tpl.to_string(), "http://example.org/{id}");
1821    }
1822
1823    #[test]
1824    fn test_row_iter() {
1825        let row = Row::from_pairs(vec![
1826            ("x".to_string(), "1".to_string()),
1827            ("y".to_string(), "2".to_string()),
1828        ]);
1829        let count = row.iter().count();
1830        assert_eq!(count, 2);
1831    }
1832}