Skip to main content

alizarin_core/
csv_model_loader.rs

1//! CSV Model Loader
2//!
3//! Parses a 3-CSV format (graph.csv, nodes.csv, collections.csv) into
4//! [`GraphInstruction`]s and [`SkosCollection`]s, suitable for building
5//! an Arches resource model via the existing graph mutator pipeline.
6//!
7//! ## CSV Format
8//!
9//! ### graph.csv (single data row)
10//! `name,ontology_class,author,description,is_resource`
11//!
12//! ### nodes.csv
13//! `parent_alias,alias,name,datatype,cardinality,ontology_class,parent_property,description,collection_name,required,searchable,exportable,sortorder`
14//!
15//! ### collections.csv
16//! `collection_name,concept_label,parent_label,sort_order`
17//!
18//! ## Example
19//! ```rust,ignore
20//! use alizarin_core::csv_model_loader::{parse_model_csvs, build_graph_from_model_csvs};
21//!
22//! let result = build_graph_from_model_csvs(graph_csv, nodes_csv, Some(collections_csv), Default::default());
23//! let (graph, collections) = result.unwrap();
24//! ```
25
26use std::collections::{HashMap, HashSet};
27
28use uuid::Uuid;
29
30use crate::graph_mutator::{GraphInstruction, MutatorOptions};
31use crate::rdm_namespace::{
32    generate_collection_uuid, generate_concept_uuid, generate_value_uuid, parse_rdm_namespace,
33};
34use crate::skos::{SkosCollection, SkosConcept, SkosNodeType, SkosValue};
35use crate::StaticGraph;
36
37/// Severity level for a diagnostic
38#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
39pub enum DiagnosticLevel {
40    Error,
41    Warning,
42}
43
44/// A single validation diagnostic
45#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
46pub struct CsvModelDiagnostic {
47    pub level: DiagnosticLevel,
48    pub file: String,
49    pub line: Option<usize>,
50    pub message: String,
51}
52
53impl std::fmt::Display for CsvModelDiagnostic {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        let level = match self.level {
56            DiagnosticLevel::Error => "ERROR",
57            DiagnosticLevel::Warning => "WARN",
58        };
59        if let Some(line) = self.line {
60            write!(f, "[{}] {}:{}: {}", level, self.file, line, self.message)
61        } else {
62            write!(f, "[{}] {}: {}", level, self.file, self.message)
63        }
64    }
65}
66
67/// Error type for CSV model loading
68#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
69pub struct CsvModelError {
70    pub diagnostics: Vec<CsvModelDiagnostic>,
71}
72
73impl std::fmt::Display for CsvModelError {
74    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75        for d in &self.diagnostics {
76            writeln!(f, "{}", d)?;
77        }
78        Ok(())
79    }
80}
81
82impl std::error::Error for CsvModelError {}
83
84/// Parsed row from graph.csv
85#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
86pub struct GraphRow {
87    pub name: String,
88    pub ontology_class: Option<String>,
89    pub author: Option<String>,
90    pub description: Option<String>,
91    pub is_resource: Option<bool>,
92}
93
94/// Parsed row from nodes.csv
95#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
96pub struct NodeRow {
97    pub parent_alias: Option<String>,
98    pub alias: String,
99    pub name: String,
100    pub datatype: String,
101    pub cardinality: String,
102    pub ontology_class: String,
103    pub parent_property: String,
104    pub description: Option<String>,
105    pub collection_name: Option<String>,
106    pub required: Option<bool>,
107    pub searchable: Option<bool>,
108    pub exportable: Option<bool>,
109    pub sortorder: Option<i32>,
110}
111
112/// Parsed row from collections.csv
113#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
114pub struct CollectionRow {
115    pub collection_name: String,
116    pub concept_label: String,
117    pub parent_label: Option<String>,
118    pub sort_order: Option<i32>,
119}
120
121/// Result of parsing the 3-CSV bundle
122#[derive(Debug, Clone)]
123pub struct ModelCsvBundle {
124    pub graph: GraphRow,
125    pub nodes: Vec<NodeRow>,
126    pub collections: Vec<CollectionRow>,
127}
128
129const VALID_DATATYPES: &[&str] = &[
130    "semantic",
131    "string",
132    "concept",
133    "concept-list",
134    "number",
135    "date",
136    "boolean",
137    "geojson-feature-collection",
138    "domain-value",
139    "domain-value-list",
140    "file-list",
141    "resource-instance",
142    "resource-instance-list",
143];
144
145const CRM_PREFIX: &str = "http://www.cidoc-crm.org/cidoc-crm/";
146
147/// Split a CSV `ontology_class` cell into individual URIs. Accepts pipe-separated
148/// values so a single model can declare classes from more than one ontology.
149/// Empty entries and whitespace are trimmed out.
150fn split_class_cell(raw: &str) -> Vec<String> {
151    raw.split('|')
152        .map(|s| s.trim())
153        .filter(|s| !s.is_empty())
154        .map(|s| s.to_string())
155        .collect()
156}
157
158fn get_field<'a>(
159    record: &'a csv::StringRecord,
160    headers: &csv::StringRecord,
161    name: &str,
162) -> Option<&'a str> {
163    headers
164        .iter()
165        .position(|h| h == name)
166        .and_then(|i| record.get(i))
167        .map(|s| s.trim())
168        .filter(|s| !s.is_empty())
169}
170
171fn get_field_required<'a>(
172    record: &'a csv::StringRecord,
173    headers: &csv::StringRecord,
174    name: &str,
175    file: &str,
176    line: usize,
177    diagnostics: &mut Vec<CsvModelDiagnostic>,
178) -> Option<&'a str> {
179    match get_field(record, headers, name) {
180        Some(v) => Some(v),
181        None => {
182            diagnostics.push(CsvModelDiagnostic {
183                level: DiagnosticLevel::Error,
184                file: file.to_string(),
185                line: Some(line),
186                message: format!("missing required field \"{}\"", name),
187            });
188            None
189        }
190    }
191}
192
193/// Parse the three CSV strings into a [`ModelCsvBundle`].
194///
195/// Returns diagnostics for parse errors. If any error-level diagnostics
196/// are present, the bundle may be incomplete but is still returned for
197/// reporting purposes.
198pub fn parse_model_csvs(
199    graph_csv: &str,
200    nodes_csv: &str,
201    collections_csv: Option<&str>,
202) -> Result<(ModelCsvBundle, Vec<CsvModelDiagnostic>), CsvModelError> {
203    let mut diagnostics = Vec::new();
204
205    // --- graph.csv ---
206    let graph = parse_graph_csv(graph_csv, &mut diagnostics)?;
207
208    // --- nodes.csv ---
209    let nodes = parse_nodes_csv(nodes_csv, &mut diagnostics);
210
211    // --- collections.csv ---
212    let collections = if let Some(csv) = collections_csv {
213        parse_collections_csv(csv, &mut diagnostics)
214    } else {
215        Vec::new()
216    };
217
218    let bundle = ModelCsvBundle {
219        graph,
220        nodes,
221        collections,
222    };
223    Ok((bundle, diagnostics))
224}
225
226fn parse_graph_csv(
227    csv_text: &str,
228    diagnostics: &mut Vec<CsvModelDiagnostic>,
229) -> Result<GraphRow, CsvModelError> {
230    let mut reader = csv::Reader::from_reader(csv_text.as_bytes());
231    let headers = reader
232        .headers()
233        .map_err(|e| CsvModelError {
234            diagnostics: vec![CsvModelDiagnostic {
235                level: DiagnosticLevel::Error,
236                file: "graph.csv".to_string(),
237                line: Some(1),
238                message: format!("failed to parse headers: {}", e),
239            }],
240        })?
241        .clone();
242
243    let record = reader
244        .records()
245        .next()
246        .ok_or_else(|| CsvModelError {
247            diagnostics: vec![CsvModelDiagnostic {
248                level: DiagnosticLevel::Error,
249                file: "graph.csv".to_string(),
250                line: None,
251                message: "expected exactly 1 data row".to_string(),
252            }],
253        })?
254        .map_err(|e| CsvModelError {
255            diagnostics: vec![CsvModelDiagnostic {
256                level: DiagnosticLevel::Error,
257                file: "graph.csv".to_string(),
258                line: Some(2),
259                message: format!("failed to parse row: {}", e),
260            }],
261        })?;
262
263    let name = get_field_required(&record, &headers, "name", "graph.csv", 2, diagnostics)
264        .unwrap_or("")
265        .to_string();
266
267    if name.is_empty() {
268        return Err(CsvModelError {
269            diagnostics: vec![CsvModelDiagnostic {
270                level: DiagnosticLevel::Error,
271                file: "graph.csv".to_string(),
272                line: Some(2),
273                message: "\"name\" is required".to_string(),
274            }],
275        });
276    }
277
278    Ok(GraphRow {
279        name,
280        ontology_class: get_field(&record, &headers, "ontology_class").map(String::from),
281        author: get_field(&record, &headers, "author").map(String::from),
282        description: get_field(&record, &headers, "description").map(String::from),
283        is_resource: get_field(&record, &headers, "is_resource").map(|v| v == "true"),
284    })
285}
286
287fn parse_nodes_csv(csv_text: &str, diagnostics: &mut Vec<CsvModelDiagnostic>) -> Vec<NodeRow> {
288    let mut reader = csv::Reader::from_reader(csv_text.as_bytes());
289    let headers = match reader.headers() {
290        Ok(h) => h.clone(),
291        Err(e) => {
292            diagnostics.push(CsvModelDiagnostic {
293                level: DiagnosticLevel::Error,
294                file: "nodes.csv".to_string(),
295                line: Some(1),
296                message: format!("failed to parse headers: {}", e),
297            });
298            return Vec::new();
299        }
300    };
301
302    let mut rows = Vec::new();
303    for (i, result) in reader.records().enumerate() {
304        let line = i + 2;
305        let record = match result {
306            Ok(r) => r,
307            Err(e) => {
308                diagnostics.push(CsvModelDiagnostic {
309                    level: DiagnosticLevel::Error,
310                    file: "nodes.csv".to_string(),
311                    line: Some(line),
312                    message: format!("failed to parse row: {}", e),
313                });
314                continue;
315            }
316        };
317
318        let alias = get_field_required(&record, &headers, "alias", "nodes.csv", line, diagnostics)
319            .unwrap_or("")
320            .to_string();
321        let name = get_field_required(&record, &headers, "name", "nodes.csv", line, diagnostics)
322            .unwrap_or("")
323            .to_string();
324        let datatype = get_field_required(
325            &record,
326            &headers,
327            "datatype",
328            "nodes.csv",
329            line,
330            diagnostics,
331        )
332        .unwrap_or("")
333        .to_string();
334        let cardinality = get_field_required(
335            &record,
336            &headers,
337            "cardinality",
338            "nodes.csv",
339            line,
340            diagnostics,
341        )
342        .unwrap_or("1")
343        .to_string();
344        let ontology_class = get_field_required(
345            &record,
346            &headers,
347            "ontology_class",
348            "nodes.csv",
349            line,
350            diagnostics,
351        )
352        .unwrap_or("")
353        .to_string();
354        let parent_property = get_field_required(
355            &record,
356            &headers,
357            "parent_property",
358            "nodes.csv",
359            line,
360            diagnostics,
361        )
362        .unwrap_or("")
363        .to_string();
364
365        if alias.is_empty() {
366            continue; // already reported
367        }
368
369        rows.push(NodeRow {
370            parent_alias: get_field(&record, &headers, "parent_alias").map(String::from),
371            alias,
372            name,
373            datatype,
374            cardinality,
375            ontology_class,
376            parent_property,
377            description: get_field(&record, &headers, "description").map(String::from),
378            collection_name: get_field(&record, &headers, "collection_name").map(String::from),
379            required: get_field(&record, &headers, "required").map(|v| v == "true"),
380            searchable: get_field(&record, &headers, "searchable").map(|v| v != "false"),
381            exportable: get_field(&record, &headers, "exportable").map(|v| v == "true"),
382            sortorder: get_field(&record, &headers, "sortorder").and_then(|v| v.parse().ok()),
383        });
384    }
385    rows
386}
387
388fn parse_collections_csv(
389    csv_text: &str,
390    diagnostics: &mut Vec<CsvModelDiagnostic>,
391) -> Vec<CollectionRow> {
392    let mut reader = csv::Reader::from_reader(csv_text.as_bytes());
393    let headers = match reader.headers() {
394        Ok(h) => h.clone(),
395        Err(e) => {
396            diagnostics.push(CsvModelDiagnostic {
397                level: DiagnosticLevel::Error,
398                file: "collections.csv".to_string(),
399                line: Some(1),
400                message: format!("failed to parse headers: {}", e),
401            });
402            return Vec::new();
403        }
404    };
405
406    let mut rows = Vec::new();
407    for (i, result) in reader.records().enumerate() {
408        let line = i + 2;
409        let record = match result {
410            Ok(r) => r,
411            Err(e) => {
412                diagnostics.push(CsvModelDiagnostic {
413                    level: DiagnosticLevel::Error,
414                    file: "collections.csv".to_string(),
415                    line: Some(line),
416                    message: format!("failed to parse row: {}", e),
417                });
418                continue;
419            }
420        };
421
422        let collection_name = get_field_required(
423            &record,
424            &headers,
425            "collection_name",
426            "collections.csv",
427            line,
428            diagnostics,
429        )
430        .unwrap_or("")
431        .to_string();
432        let concept_label = get_field_required(
433            &record,
434            &headers,
435            "concept_label",
436            "collections.csv",
437            line,
438            diagnostics,
439        )
440        .unwrap_or("")
441        .to_string();
442
443        if collection_name.is_empty() || concept_label.is_empty() {
444            continue;
445        }
446
447        rows.push(CollectionRow {
448            collection_name,
449            concept_label,
450            parent_label: get_field(&record, &headers, "parent_label").map(String::from),
451            sort_order: get_field(&record, &headers, "sort_order").and_then(|v| v.parse().ok()),
452        });
453    }
454    rows
455}
456
457/// Validate a parsed [`ModelCsvBundle`] without building anything.
458///
459/// Returns all diagnostics (errors and warnings). Useful for
460/// checking CSV output from the arches-model skill.
461pub fn validate_model_csvs(bundle: &ModelCsvBundle) -> Vec<CsvModelDiagnostic> {
462    let mut diagnostics = Vec::new();
463
464    // --- Validate nodes ---
465    let mut aliases: HashSet<&str> = HashSet::new();
466    let mut parent_aliases: HashSet<&str> = HashSet::new();
467    let mut collection_refs: HashSet<&str> = HashSet::new();
468
469    for (i, node) in bundle.nodes.iter().enumerate() {
470        let line = i + 2;
471
472        // Unique alias
473        if !aliases.insert(&node.alias) {
474            diagnostics.push(CsvModelDiagnostic {
475                level: DiagnosticLevel::Error,
476                file: "nodes.csv".to_string(),
477                line: Some(line),
478                message: format!("duplicate alias \"{}\"", node.alias),
479            });
480        }
481
482        // Valid datatype
483        if !VALID_DATATYPES.contains(&node.datatype.as_str()) {
484            diagnostics.push(CsvModelDiagnostic {
485                level: DiagnosticLevel::Error,
486                file: "nodes.csv".to_string(),
487                line: Some(line),
488                message: format!("invalid datatype \"{}\"", node.datatype),
489            });
490        }
491
492        // Valid cardinality
493        if node.cardinality != "1" && node.cardinality != "n" {
494            diagnostics.push(CsvModelDiagnostic {
495                level: DiagnosticLevel::Error,
496                file: "nodes.csv".to_string(),
497                line: Some(line),
498                message: format!(
499                    "invalid cardinality \"{}\" (must be \"1\" or \"n\")",
500                    node.cardinality
501                ),
502            });
503        }
504
505        // Ontology URI prefix — warn per class if any are outside CIDOC-CRM.
506        // Multi-class cells use `|` as separator so each entry is checked
507        // independently. This stays a warning so non-CRM ontologies are allowed.
508        for class in split_class_cell(&node.ontology_class) {
509            if !class.starts_with(CRM_PREFIX) {
510                diagnostics.push(CsvModelDiagnostic {
511                    level: DiagnosticLevel::Warning,
512                    file: "nodes.csv".to_string(),
513                    line: Some(line),
514                    message: format!("ontology_class \"{}\" does not use CIDOC-CRM prefix", class),
515                });
516            }
517        }
518        if !node.parent_property.starts_with(CRM_PREFIX) && !node.parent_property.is_empty() {
519            diagnostics.push(CsvModelDiagnostic {
520                level: DiagnosticLevel::Warning,
521                file: "nodes.csv".to_string(),
522                line: Some(line),
523                message: format!(
524                    "parent_property \"{}\" does not use CIDOC-CRM prefix",
525                    node.parent_property
526                ),
527            });
528        }
529
530        // Concept nodes must have collection_name
531        if (node.datatype == "concept" || node.datatype == "concept-list")
532            && node.collection_name.is_none()
533        {
534            diagnostics.push(CsvModelDiagnostic {
535                level: DiagnosticLevel::Warning,
536                file: "nodes.csv".to_string(),
537                line: Some(line),
538                message: format!("concept node \"{}\" has no collection_name", node.alias),
539            });
540        }
541
542        if let Some(ref cn) = node.collection_name {
543            collection_refs.insert(cn.as_str());
544        }
545
546        if let Some(ref pa) = node.parent_alias {
547            parent_aliases.insert(pa.as_str());
548        }
549    }
550
551    // Dangling parent_alias references
552    for (i, node) in bundle.nodes.iter().enumerate() {
553        if let Some(ref pa) = node.parent_alias {
554            if !aliases.contains(pa.as_str()) {
555                diagnostics.push(CsvModelDiagnostic {
556                    level: DiagnosticLevel::Error,
557                    file: "nodes.csv".to_string(),
558                    line: Some(i + 2),
559                    message: format!("parent_alias \"{}\" not found in defined aliases", pa),
560                });
561            }
562        }
563    }
564
565    // Semantic nodes should have children
566    for node in &bundle.nodes {
567        if node.datatype == "semantic" && !parent_aliases.contains(node.alias.as_str()) {
568            diagnostics.push(CsvModelDiagnostic {
569                level: DiagnosticLevel::Warning,
570                file: "nodes.csv".to_string(),
571                line: None,
572                message: format!("semantic node \"{}\" has no children", node.alias),
573            });
574        }
575    }
576
577    // --- Validate collections ---
578    let mut collection_names: HashSet<&str> = HashSet::new();
579    let mut concepts_by_collection: HashMap<&str, HashSet<&str>> = HashMap::new();
580
581    for (i, row) in bundle.collections.iter().enumerate() {
582        let line = i + 2;
583        collection_names.insert(&row.collection_name);
584
585        let labels = concepts_by_collection
586            .entry(&row.collection_name)
587            .or_default();
588        labels.insert(&row.concept_label);
589
590        // Check parent_label references within same collection
591        if let Some(ref parent) = row.parent_label {
592            if !labels.contains(parent.as_str()) {
593                // Parent may appear later in file, but we can still flag forward refs
594                // that never resolve. We'll do a second pass below.
595            }
596            // Check for self-reference
597            if parent == &row.concept_label {
598                diagnostics.push(CsvModelDiagnostic {
599                    level: DiagnosticLevel::Error,
600                    file: "collections.csv".to_string(),
601                    line: Some(line),
602                    message: format!(
603                        "concept \"{}\" references itself as parent",
604                        row.concept_label
605                    ),
606                });
607            }
608        }
609    }
610
611    // Second pass: check parent_label references resolve within collection
612    for (i, row) in bundle.collections.iter().enumerate() {
613        if let Some(ref parent) = row.parent_label {
614            if let Some(labels) = concepts_by_collection.get(row.collection_name.as_str()) {
615                if !labels.contains(parent.as_str()) {
616                    diagnostics.push(CsvModelDiagnostic {
617                        level: DiagnosticLevel::Error,
618                        file: "collections.csv".to_string(),
619                        line: Some(i + 2),
620                        message: format!(
621                            "parent_label \"{}\" not found in collection \"{}\"",
622                            parent, row.collection_name
623                        ),
624                    });
625                }
626            }
627        }
628    }
629
630    // Cross-reference: nodes.csv collection_name → collections.csv.
631    //
632    // Warning (not Error) because many Arches projects load concept collections from
633    // external SKOS XML at RDM-load time rather than declaring them inline in the
634    // model CSVs. In that workflow a node can legitimately reference a collection by
635    // name without a matching row here; the real rdmCollection UUID is resolved at
636    // Arches startup from the SKOS import, not from this CSV.
637    for cn in &collection_refs {
638        if !collection_names.contains(cn) {
639            diagnostics.push(CsvModelDiagnostic {
640                level: DiagnosticLevel::Warning,
641                file: "nodes.csv".to_string(),
642                line: None,
643                message: format!(
644                    "references collection \"{}\" but it is not defined in collections.csv (expected if loaded from external SKOS)",
645                    cn
646                ),
647            });
648        }
649    }
650
651    // Graph-level ontology check — warn per class, allow pipe-separated lists.
652    if let Some(ref oc) = bundle.graph.ontology_class {
653        for class in split_class_cell(oc) {
654            if !class.starts_with(CRM_PREFIX) {
655                diagnostics.push(CsvModelDiagnostic {
656                    level: DiagnosticLevel::Warning,
657                    file: "graph.csv".to_string(),
658                    line: None,
659                    message: format!("ontology_class \"{}\" does not use CIDOC-CRM prefix", class),
660                });
661            }
662        }
663    }
664
665    diagnostics
666}
667
668/// Convert a [`ModelCsvBundle`] to [`GraphInstruction`]s.
669///
670/// The returned instructions start with `create_model` and are followed
671/// by `add_node` instructions in topological order (parents before children).
672///
673/// # Arguments
674/// * `bundle` - Parsed CSV bundle
675/// * `rdm_namespace` - RDM namespace string (UUID or URL), used for deterministic
676///   collection/concept ID generation. Must match the namespace configured in
677///   the target Arches instance.
678pub fn model_csvs_to_instructions(
679    bundle: &ModelCsvBundle,
680    rdm_namespace: &str,
681) -> Result<Vec<GraphInstruction>, CsvModelError> {
682    let ns = parse_rdm_namespace(rdm_namespace).map_err(|e| CsvModelError {
683        diagnostics: vec![CsvModelDiagnostic {
684            level: DiagnosticLevel::Error,
685            file: "(namespace)".to_string(),
686            line: None,
687            message: e,
688        }],
689    })?;
690
691    let mut instructions = Vec::new();
692
693    // Slugify graph name for root alias
694    let root_alias = crate::graph_mutator::slugify(&bundle.graph.name);
695
696    // create_model instruction
697    let mut create = GraphInstruction::new("create_model", &root_alias, "");
698    create = create.with_str("name", &bundle.graph.name);
699    if let Some(ref oc) = bundle.graph.ontology_class {
700        let classes = split_class_cell(oc);
701        if classes.len() == 1 {
702            create = create.with_str("ontology_class", &classes[0]);
703        } else if !classes.is_empty() {
704            create = create.with_param(
705                "ontology_class",
706                serde_json::Value::Array(
707                    classes.into_iter().map(serde_json::Value::String).collect(),
708                ),
709            );
710        }
711    }
712    instructions.push(create);
713
714    // Build collection name→id map for concept nodes
715    let collection_ids = build_collection_id_map(bundle, &ns);
716
717    // Topologically sort nodes (parents before children)
718    let sorted = topological_sort(&bundle.nodes);
719
720    // add_node instructions
721    for node in &sorted {
722        let subject = match &node.parent_alias {
723            Some(pa) => pa.as_str(),
724            None => root_alias.as_str(),
725        };
726
727        let mut instr = GraphInstruction::new("add_node", subject, &node.alias);
728        instr = instr.with_str("name", &node.name);
729        instr = instr.with_str("datatype", &node.datatype);
730        instr = instr.with_str("cardinality", &node.cardinality);
731        // Multi-class cells are pipe-separated; pass as array when more than one.
732        let classes = split_class_cell(&node.ontology_class);
733        if classes.len() == 1 {
734            instr = instr.with_str("ontology_class", &classes[0]);
735        } else if !classes.is_empty() {
736            instr = instr.with_param(
737                "ontology_class",
738                serde_json::Value::Array(
739                    classes.into_iter().map(serde_json::Value::String).collect(),
740                ),
741            );
742        }
743        instr = instr.with_str("parent_property", &node.parent_property);
744
745        if let Some(ref desc) = node.description {
746            instr = instr.with_str("description", desc);
747        }
748        if let Some(req) = node.required {
749            instr = instr.with_param("isrequired", serde_json::Value::Bool(req));
750        }
751        if let Some(search) = node.searchable {
752            instr = instr.with_param("issearchable", serde_json::Value::Bool(search));
753        }
754        if let Some(exp) = node.exportable {
755            instr = instr.with_param("exportable", serde_json::Value::Bool(exp));
756        }
757        if let Some(so) = node.sortorder {
758            instr = instr.with_param("sortorder", serde_json::Value::Number(so.into()));
759        }
760
761        // For concept nodes, attach collection ID as config
762        if node.datatype == "concept" || node.datatype == "concept-list" {
763            if let Some(ref cn) = node.collection_name {
764                if let Some(cid) = collection_ids.get(cn.as_str()) {
765                    let config = serde_json::json!({ "rdmCollection": cid });
766                    instr = instr.with_param("config", config);
767                }
768            }
769        }
770
771        instructions.push(instr);
772    }
773
774    Ok(instructions)
775}
776
777/// Build [`SkosCollection`]s from the collections rows in a [`ModelCsvBundle`].
778///
779/// # Arguments
780/// * `bundle` - Parsed CSV bundle
781/// * `rdm_namespace` - RDM namespace string (UUID or URL)
782pub fn model_csvs_to_collections(
783    bundle: &ModelCsvBundle,
784    rdm_namespace: &str,
785) -> Result<Vec<SkosCollection>, CsvModelError> {
786    let ns = parse_rdm_namespace(rdm_namespace).map_err(|e| CsvModelError {
787        diagnostics: vec![CsvModelDiagnostic {
788            level: DiagnosticLevel::Error,
789            file: "(namespace)".to_string(),
790            line: None,
791            message: e,
792        }],
793    })?;
794
795    // Group rows by collection_name
796    let mut grouped: HashMap<&str, Vec<&CollectionRow>> = HashMap::new();
797    for row in &bundle.collections {
798        grouped.entry(&row.collection_name).or_default().push(row);
799    }
800
801    let mut collections = Vec::new();
802    for (name, rows) in &grouped {
803        let coll_uuid = generate_collection_uuid(&ns, name);
804        let collection_id = coll_uuid.to_string();
805
806        let label_value_id = generate_value_uuid(&collection_id, name, "en");
807        let pref_labels = {
808            let mut m = HashMap::new();
809            m.insert(
810                "en".to_string(),
811                SkosValue {
812                    id: label_value_id.to_string(),
813                    value: name.to_string(),
814                },
815            );
816            m
817        };
818
819        // Build concept hierarchy: first create all concepts, then link children
820        let mut concept_map: HashMap<&str, SkosConcept> = HashMap::new();
821        let mut all_concepts: HashMap<String, SkosConcept> = HashMap::new();
822        let mut values: HashMap<String, SkosValue> = HashMap::new();
823
824        // First pass: create all concepts
825        for row in rows {
826            let concept_uuid = generate_concept_uuid(&coll_uuid, &row.concept_label);
827            let concept_id = concept_uuid.to_string();
828
829            let label_vid = generate_value_uuid(&concept_id, &row.concept_label, "en");
830            let concept = SkosConcept {
831                id: concept_id.clone(),
832                uri: None,
833                pref_labels: {
834                    let mut m = HashMap::new();
835                    m.insert(
836                        "en".to_string(),
837                        SkosValue {
838                            id: label_vid.to_string(),
839                            value: row.concept_label.clone(),
840                        },
841                    );
842                    m
843                },
844                source: None,
845                sort_order: row.sort_order,
846                children: Some(Vec::new()),
847            };
848            values.insert(
849                label_vid.to_string(),
850                SkosValue {
851                    id: label_vid.to_string(),
852                    value: row.concept_label.clone(),
853                },
854            );
855            all_concepts.insert(concept_id, concept.clone());
856            concept_map.insert(&row.concept_label, concept);
857        }
858
859        // Second pass: build hierarchy by cloning children into parents
860        let mut top_level_labels: Vec<&str> = Vec::new();
861        for row in rows {
862            if let Some(ref parent_label) = row.parent_label {
863                // Clone child first to avoid simultaneous borrow
864                let child = concept_map.get(row.concept_label.as_str()).cloned();
865                if let (Some(parent), Some(child)) =
866                    (concept_map.get_mut(parent_label.as_str()), child)
867                {
868                    if let Some(ref mut children) = parent.children {
869                        children.push(child);
870                    }
871                }
872            } else {
873                top_level_labels.push(&row.concept_label);
874            }
875        }
876
877        // Top-level concepts map (excludes children)
878        let mut top_concepts: HashMap<String, SkosConcept> = HashMap::new();
879        for label in &top_level_labels {
880            if let Some(concept) = concept_map.get(label) {
881                top_concepts.insert(concept.id.clone(), concept.clone());
882            }
883        }
884
885        collections.push(SkosCollection {
886            id: collection_id,
887            uri: None,
888            pref_labels,
889            alt_labels: HashMap::new(),
890            scope_notes: HashMap::new(),
891            node_type: SkosNodeType::ConceptScheme,
892            concepts: top_concepts,
893            all_concepts,
894            values,
895        });
896    }
897
898    Ok(collections)
899}
900
901/// Build a graph and collections from the 3-CSV format.
902///
903/// This is the main entry point. It parses, validates, converts to
904/// instructions, and builds the graph via the standard mutation pipeline.
905///
906/// # Arguments
907/// * `graph_csv` - Contents of graph.csv
908/// * `nodes_csv` - Contents of nodes.csv
909/// * `collections_csv` - Contents of collections.csv (optional)
910/// * `rdm_namespace` - RDM namespace string (UUID or URL) for deterministic ID generation
911/// * `options` - Mutator options (autocreate cards/widgets, ontology validation)
912///
913/// Returns the built graph and any SKOS collections, or an error with diagnostics.
914pub fn build_graph_from_model_csvs(
915    graph_csv: &str,
916    nodes_csv: &str,
917    collections_csv: Option<&str>,
918    rdm_namespace: &str,
919    options: MutatorOptions,
920) -> Result<(StaticGraph, Vec<SkosCollection>), CsvModelError> {
921    let (bundle, mut diagnostics) = parse_model_csvs(graph_csv, nodes_csv, collections_csv)?;
922
923    // Validate
924    let validation = validate_model_csvs(&bundle);
925    let has_errors = validation.iter().any(|d| d.level == DiagnosticLevel::Error);
926    diagnostics.extend(validation);
927
928    if has_errors {
929        return Err(CsvModelError { diagnostics });
930    }
931
932    // Convert to instructions and build
933    let instructions = model_csvs_to_instructions(&bundle, rdm_namespace)?;
934    let graph = crate::graph_mutator::build_graph_from_instructions(instructions, options)
935        .map_err(|e| CsvModelError {
936            diagnostics: {
937                diagnostics.push(CsvModelDiagnostic {
938                    level: DiagnosticLevel::Error,
939                    file: "(build)".to_string(),
940                    line: None,
941                    message: e,
942                });
943                diagnostics
944            },
945        })?;
946
947    let collections = model_csvs_to_collections(&bundle, rdm_namespace)?;
948
949    Ok((graph, collections))
950}
951
952/// Validate CSVs and return diagnostics without building.
953///
954/// Convenience function that parses and validates the 3-CSV format,
955/// returning all diagnostics. No graph is built, so no namespace is needed.
956pub fn validate_model_csvs_from_strings(
957    graph_csv: &str,
958    nodes_csv: &str,
959    collections_csv: Option<&str>,
960) -> Vec<CsvModelDiagnostic> {
961    match parse_model_csvs(graph_csv, nodes_csv, collections_csv) {
962        Ok((bundle, mut parse_diags)) => {
963            let validation = validate_model_csvs(&bundle);
964            parse_diags.extend(validation);
965            parse_diags
966        }
967        Err(e) => e.diagnostics,
968    }
969}
970
971// --- Helpers ---
972
973fn build_collection_id_map<'a>(
974    bundle: &'a ModelCsvBundle,
975    namespace: &Uuid,
976) -> HashMap<&'a str, String> {
977    let mut map = HashMap::new();
978    let mut seen: HashSet<&str> = HashSet::new();
979    for row in &bundle.collections {
980        if seen.insert(&row.collection_name) {
981            map.insert(
982                row.collection_name.as_str(),
983                generate_collection_uuid(namespace, &row.collection_name).to_string(),
984            );
985        }
986    }
987    map
988}
989
990fn topological_sort(nodes: &[NodeRow]) -> Vec<&NodeRow> {
991    let by_alias: HashMap<&str, &NodeRow> = nodes.iter().map(|n| (n.alias.as_str(), n)).collect();
992    let mut visited: HashSet<&str> = HashSet::new();
993    let mut sorted: Vec<&NodeRow> = Vec::new();
994
995    fn visit<'a>(
996        alias: &'a str,
997        by_alias: &HashMap<&str, &'a NodeRow>,
998        visited: &mut HashSet<&'a str>,
999        sorted: &mut Vec<&'a NodeRow>,
1000    ) {
1001        if visited.contains(alias) {
1002            return;
1003        }
1004        visited.insert(alias);
1005        if let Some(node) = by_alias.get(alias) {
1006            if let Some(ref pa) = node.parent_alias {
1007                if by_alias.contains_key(pa.as_str()) {
1008                    visit(pa, by_alias, visited, sorted);
1009                }
1010            }
1011            sorted.push(node);
1012        }
1013    }
1014
1015    for node in nodes {
1016        visit(&node.alias, &by_alias, &mut visited, &mut sorted);
1017    }
1018    sorted
1019}
1020
1021#[cfg(test)]
1022mod tests {
1023    use super::*;
1024
1025    const GRAPH_CSV: &str = r#"name,ontology_class,author,description,is_resource
1026Heritage Monument,http://www.cidoc-crm.org/cidoc-crm/E24_Physical_Human-Made_Thing,,A heritage monument,true"#;
1027
1028    const NODES_CSV: &str = r#"parent_alias,alias,name,datatype,cardinality,ontology_class,parent_property,description,collection_name,required,searchable,exportable,sortorder
1029,name,Name,string,1,http://www.cidoc-crm.org/cidoc-crm/E41_Appellation,http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by,Primary name,,true,true,true,1
1030,monument_type,Monument Type,concept,1,http://www.cidoc-crm.org/cidoc-crm/E55_Type,http://www.cidoc-crm.org/cidoc-crm/P2_has_type,Type classification,Monument Types,true,true,true,2
1031,location,Location,semantic,n,http://www.cidoc-crm.org/cidoc-crm/E53_Place,http://www.cidoc-crm.org/cidoc-crm/P53_has_former_or_current_location,,,false,true,true,3
1032location,place_name,Place Name,string,1,http://www.cidoc-crm.org/cidoc-crm/E44_Place_Appellation,http://www.cidoc-crm.org/cidoc-crm/P87_is_identified_by,,,true,true,true,1
1033location,geometry,Geometry,geojson-feature-collection,1,http://www.cidoc-crm.org/cidoc-crm/E94_Space_Primitive,http://www.cidoc-crm.org/cidoc-crm/P168_place_is_defined_by,,,false,false,true,2"#;
1034
1035    const COLLECTIONS_CSV: &str = r#"collection_name,concept_label,parent_label,sort_order
1036Monument Types,Castle,,1
1037Monument Types,Church,,2
1038Monument Types,Bridge,,3
1039Monument Types,Fortification,,4
1040Monument Types,Motte,Castle,5"#;
1041
1042    #[test]
1043    fn test_parse_and_validate() {
1044        let (bundle, parse_diags) =
1045            parse_model_csvs(GRAPH_CSV, NODES_CSV, Some(COLLECTIONS_CSV)).unwrap();
1046        assert!(parse_diags
1047            .iter()
1048            .all(|d| d.level != DiagnosticLevel::Error));
1049        assert_eq!(bundle.graph.name, "Heritage Monument");
1050        assert_eq!(bundle.nodes.len(), 5);
1051        assert_eq!(bundle.collections.len(), 5);
1052
1053        let validation = validate_model_csvs(&bundle);
1054        let errors: Vec<_> = validation
1055            .iter()
1056            .filter(|d| d.level == DiagnosticLevel::Error)
1057            .collect();
1058        assert!(errors.is_empty(), "Unexpected errors: {:?}", errors);
1059    }
1060
1061    const TEST_NAMESPACE: &str = "http://test.example.org/rdm/";
1062
1063    #[test]
1064    fn test_to_instructions() {
1065        let (bundle, _) = parse_model_csvs(GRAPH_CSV, NODES_CSV, Some(COLLECTIONS_CSV)).unwrap();
1066        let instructions = model_csvs_to_instructions(&bundle, TEST_NAMESPACE).unwrap();
1067
1068        assert_eq!(instructions[0].action, "create_model");
1069        assert_eq!(instructions.len(), 6); // 1 create + 5 nodes
1070                                           // Verify topological order: location before place_name and geometry
1071        let aliases: Vec<&str> = instructions
1072            .iter()
1073            .filter(|i| i.action == "add_node")
1074            .map(|i| i.object.as_str())
1075            .collect();
1076        let loc_idx = aliases.iter().position(|a| *a == "location").unwrap();
1077        let pn_idx = aliases.iter().position(|a| *a == "place_name").unwrap();
1078        let geo_idx = aliases.iter().position(|a| *a == "geometry").unwrap();
1079        assert!(loc_idx < pn_idx);
1080        assert!(loc_idx < geo_idx);
1081    }
1082
1083    #[test]
1084    fn test_to_collections() {
1085        let (bundle, _) = parse_model_csvs(GRAPH_CSV, NODES_CSV, Some(COLLECTIONS_CSV)).unwrap();
1086        let collections = model_csvs_to_collections(&bundle, TEST_NAMESPACE).unwrap();
1087
1088        assert_eq!(collections.len(), 1);
1089        let coll = &collections[0];
1090        assert_eq!(coll.all_concepts.len(), 5);
1091        // Motte should be a child of Castle, not at top level
1092        assert_eq!(coll.concepts.len(), 4); // Castle, Church, Bridge, Fortification at top
1093    }
1094
1095    #[test]
1096    fn test_build_graph() {
1097        let result = build_graph_from_model_csvs(
1098            GRAPH_CSV,
1099            NODES_CSV,
1100            Some(COLLECTIONS_CSV),
1101            TEST_NAMESPACE,
1102            MutatorOptions::default(),
1103        );
1104        let (graph, collections) = result.unwrap();
1105        // root + 5 nodes
1106        assert_eq!(graph.nodes.len(), 6);
1107        assert_eq!(graph.edges.len(), 5);
1108        assert_eq!(collections.len(), 1);
1109    }
1110
1111    #[test]
1112    fn test_dangling_parent_alias() {
1113        let bad_nodes = r#"parent_alias,alias,name,datatype,cardinality,ontology_class,parent_property
1114nonexistent,child,Child,string,1,http://www.cidoc-crm.org/cidoc-crm/E62_String,http://www.cidoc-crm.org/cidoc-crm/P3_has_note"#;
1115        let diags = validate_model_csvs_from_strings(GRAPH_CSV, bad_nodes, None);
1116        assert!(diags
1117            .iter()
1118            .any(|d| d.message.contains("nonexistent") && d.level == DiagnosticLevel::Error));
1119    }
1120
1121    #[test]
1122    fn test_missing_collection() {
1123        // Missing collection refs are warnings (not errors) — see validate_model_csvs
1124        // for the rationale (external SKOS imports are a valid workflow).
1125        let nodes_with_concept = r#"parent_alias,alias,name,datatype,cardinality,ontology_class,parent_property,collection_name
1126,my_type,Type,concept,1,http://www.cidoc-crm.org/cidoc-crm/E55_Type,http://www.cidoc-crm.org/cidoc-crm/P2_has_type,Missing Collection"#;
1127        let diags = validate_model_csvs_from_strings(GRAPH_CSV, nodes_with_concept, None);
1128        assert!(diags.iter().any(
1129            |d| d.message.contains("Missing Collection") && d.level == DiagnosticLevel::Warning
1130        ));
1131    }
1132}