1use std::collections::{HashMap, HashSet};
27
28use uuid::Uuid;
29
30use crate::graph_mutator::{GraphInstruction, MutatorOptions};
31use crate::rdm_namespace::{
32 generate_collection_uuid, generate_concept_uuid, generate_value_uuid, parse_rdm_namespace,
33};
34use crate::skos::{SkosCollection, SkosConcept, SkosNodeType, SkosValue};
35use crate::StaticGraph;
36
37#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
39pub enum DiagnosticLevel {
40 Error,
41 Warning,
42}
43
44#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
46pub struct CsvModelDiagnostic {
47 pub level: DiagnosticLevel,
48 pub file: String,
49 pub line: Option<usize>,
50 pub message: String,
51}
52
53impl std::fmt::Display for CsvModelDiagnostic {
54 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55 let level = match self.level {
56 DiagnosticLevel::Error => "ERROR",
57 DiagnosticLevel::Warning => "WARN",
58 };
59 if let Some(line) = self.line {
60 write!(f, "[{}] {}:{}: {}", level, self.file, line, self.message)
61 } else {
62 write!(f, "[{}] {}: {}", level, self.file, self.message)
63 }
64 }
65}
66
67#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
69pub struct CsvModelError {
70 pub diagnostics: Vec<CsvModelDiagnostic>,
71}
72
73impl std::fmt::Display for CsvModelError {
74 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
75 for d in &self.diagnostics {
76 writeln!(f, "{}", d)?;
77 }
78 Ok(())
79 }
80}
81
82impl std::error::Error for CsvModelError {}
83
84#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
86pub struct GraphRow {
87 pub name: String,
88 pub ontology_class: Option<String>,
89 pub author: Option<String>,
90 pub description: Option<String>,
91 pub is_resource: Option<bool>,
92}
93
94#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
96pub struct NodeRow {
97 pub parent_alias: Option<String>,
98 pub alias: String,
99 pub name: String,
100 pub datatype: String,
101 pub cardinality: String,
102 pub ontology_class: String,
103 pub parent_property: String,
104 pub description: Option<String>,
105 pub collection_name: Option<String>,
106 pub required: Option<bool>,
107 pub searchable: Option<bool>,
108 pub exportable: Option<bool>,
109 pub sortorder: Option<i32>,
110}
111
112#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
114pub struct CollectionRow {
115 pub collection_name: String,
116 pub concept_label: String,
117 pub parent_label: Option<String>,
118 pub sort_order: Option<i32>,
119}
120
121#[derive(Debug, Clone)]
123pub struct ModelCsvBundle {
124 pub graph: GraphRow,
125 pub nodes: Vec<NodeRow>,
126 pub collections: Vec<CollectionRow>,
127}
128
129const VALID_DATATYPES: &[&str] = &[
130 "semantic",
131 "string",
132 "concept",
133 "concept-list",
134 "number",
135 "date",
136 "boolean",
137 "geojson-feature-collection",
138 "domain-value",
139 "domain-value-list",
140 "file-list",
141 "resource-instance",
142 "resource-instance-list",
143];
144
145const CRM_PREFIX: &str = "http://www.cidoc-crm.org/cidoc-crm/";
146
147fn split_class_cell(raw: &str) -> Vec<String> {
151 raw.split('|')
152 .map(|s| s.trim())
153 .filter(|s| !s.is_empty())
154 .map(|s| s.to_string())
155 .collect()
156}
157
158fn get_field<'a>(
159 record: &'a csv::StringRecord,
160 headers: &csv::StringRecord,
161 name: &str,
162) -> Option<&'a str> {
163 headers
164 .iter()
165 .position(|h| h == name)
166 .and_then(|i| record.get(i))
167 .map(|s| s.trim())
168 .filter(|s| !s.is_empty())
169}
170
171fn get_field_required<'a>(
172 record: &'a csv::StringRecord,
173 headers: &csv::StringRecord,
174 name: &str,
175 file: &str,
176 line: usize,
177 diagnostics: &mut Vec<CsvModelDiagnostic>,
178) -> Option<&'a str> {
179 match get_field(record, headers, name) {
180 Some(v) => Some(v),
181 None => {
182 diagnostics.push(CsvModelDiagnostic {
183 level: DiagnosticLevel::Error,
184 file: file.to_string(),
185 line: Some(line),
186 message: format!("missing required field \"{}\"", name),
187 });
188 None
189 }
190 }
191}
192
193pub fn parse_model_csvs(
199 graph_csv: &str,
200 nodes_csv: &str,
201 collections_csv: Option<&str>,
202) -> Result<(ModelCsvBundle, Vec<CsvModelDiagnostic>), CsvModelError> {
203 let mut diagnostics = Vec::new();
204
205 let graph = parse_graph_csv(graph_csv, &mut diagnostics)?;
207
208 let nodes = parse_nodes_csv(nodes_csv, &mut diagnostics);
210
211 let collections = if let Some(csv) = collections_csv {
213 parse_collections_csv(csv, &mut diagnostics)
214 } else {
215 Vec::new()
216 };
217
218 let bundle = ModelCsvBundle {
219 graph,
220 nodes,
221 collections,
222 };
223 Ok((bundle, diagnostics))
224}
225
226fn parse_graph_csv(
227 csv_text: &str,
228 diagnostics: &mut Vec<CsvModelDiagnostic>,
229) -> Result<GraphRow, CsvModelError> {
230 let mut reader = csv::Reader::from_reader(csv_text.as_bytes());
231 let headers = reader
232 .headers()
233 .map_err(|e| CsvModelError {
234 diagnostics: vec![CsvModelDiagnostic {
235 level: DiagnosticLevel::Error,
236 file: "graph.csv".to_string(),
237 line: Some(1),
238 message: format!("failed to parse headers: {}", e),
239 }],
240 })?
241 .clone();
242
243 let record = reader
244 .records()
245 .next()
246 .ok_or_else(|| CsvModelError {
247 diagnostics: vec![CsvModelDiagnostic {
248 level: DiagnosticLevel::Error,
249 file: "graph.csv".to_string(),
250 line: None,
251 message: "expected exactly 1 data row".to_string(),
252 }],
253 })?
254 .map_err(|e| CsvModelError {
255 diagnostics: vec![CsvModelDiagnostic {
256 level: DiagnosticLevel::Error,
257 file: "graph.csv".to_string(),
258 line: Some(2),
259 message: format!("failed to parse row: {}", e),
260 }],
261 })?;
262
263 let name = get_field_required(&record, &headers, "name", "graph.csv", 2, diagnostics)
264 .unwrap_or("")
265 .to_string();
266
267 if name.is_empty() {
268 return Err(CsvModelError {
269 diagnostics: vec![CsvModelDiagnostic {
270 level: DiagnosticLevel::Error,
271 file: "graph.csv".to_string(),
272 line: Some(2),
273 message: "\"name\" is required".to_string(),
274 }],
275 });
276 }
277
278 Ok(GraphRow {
279 name,
280 ontology_class: get_field(&record, &headers, "ontology_class").map(String::from),
281 author: get_field(&record, &headers, "author").map(String::from),
282 description: get_field(&record, &headers, "description").map(String::from),
283 is_resource: get_field(&record, &headers, "is_resource").map(|v| v == "true"),
284 })
285}
286
287fn parse_nodes_csv(csv_text: &str, diagnostics: &mut Vec<CsvModelDiagnostic>) -> Vec<NodeRow> {
288 let mut reader = csv::Reader::from_reader(csv_text.as_bytes());
289 let headers = match reader.headers() {
290 Ok(h) => h.clone(),
291 Err(e) => {
292 diagnostics.push(CsvModelDiagnostic {
293 level: DiagnosticLevel::Error,
294 file: "nodes.csv".to_string(),
295 line: Some(1),
296 message: format!("failed to parse headers: {}", e),
297 });
298 return Vec::new();
299 }
300 };
301
302 let mut rows = Vec::new();
303 for (i, result) in reader.records().enumerate() {
304 let line = i + 2;
305 let record = match result {
306 Ok(r) => r,
307 Err(e) => {
308 diagnostics.push(CsvModelDiagnostic {
309 level: DiagnosticLevel::Error,
310 file: "nodes.csv".to_string(),
311 line: Some(line),
312 message: format!("failed to parse row: {}", e),
313 });
314 continue;
315 }
316 };
317
318 let alias = get_field_required(&record, &headers, "alias", "nodes.csv", line, diagnostics)
319 .unwrap_or("")
320 .to_string();
321 let name = get_field_required(&record, &headers, "name", "nodes.csv", line, diagnostics)
322 .unwrap_or("")
323 .to_string();
324 let datatype = get_field_required(
325 &record,
326 &headers,
327 "datatype",
328 "nodes.csv",
329 line,
330 diagnostics,
331 )
332 .unwrap_or("")
333 .to_string();
334 let cardinality = get_field_required(
335 &record,
336 &headers,
337 "cardinality",
338 "nodes.csv",
339 line,
340 diagnostics,
341 )
342 .unwrap_or("1")
343 .to_string();
344 let ontology_class = get_field_required(
345 &record,
346 &headers,
347 "ontology_class",
348 "nodes.csv",
349 line,
350 diagnostics,
351 )
352 .unwrap_or("")
353 .to_string();
354 let parent_property = get_field_required(
355 &record,
356 &headers,
357 "parent_property",
358 "nodes.csv",
359 line,
360 diagnostics,
361 )
362 .unwrap_or("")
363 .to_string();
364
365 if alias.is_empty() {
366 continue; }
368
369 rows.push(NodeRow {
370 parent_alias: get_field(&record, &headers, "parent_alias").map(String::from),
371 alias,
372 name,
373 datatype,
374 cardinality,
375 ontology_class,
376 parent_property,
377 description: get_field(&record, &headers, "description").map(String::from),
378 collection_name: get_field(&record, &headers, "collection_name").map(String::from),
379 required: get_field(&record, &headers, "required").map(|v| v == "true"),
380 searchable: get_field(&record, &headers, "searchable").map(|v| v != "false"),
381 exportable: get_field(&record, &headers, "exportable").map(|v| v == "true"),
382 sortorder: get_field(&record, &headers, "sortorder").and_then(|v| v.parse().ok()),
383 });
384 }
385 rows
386}
387
388fn parse_collections_csv(
389 csv_text: &str,
390 diagnostics: &mut Vec<CsvModelDiagnostic>,
391) -> Vec<CollectionRow> {
392 let mut reader = csv::Reader::from_reader(csv_text.as_bytes());
393 let headers = match reader.headers() {
394 Ok(h) => h.clone(),
395 Err(e) => {
396 diagnostics.push(CsvModelDiagnostic {
397 level: DiagnosticLevel::Error,
398 file: "collections.csv".to_string(),
399 line: Some(1),
400 message: format!("failed to parse headers: {}", e),
401 });
402 return Vec::new();
403 }
404 };
405
406 let mut rows = Vec::new();
407 for (i, result) in reader.records().enumerate() {
408 let line = i + 2;
409 let record = match result {
410 Ok(r) => r,
411 Err(e) => {
412 diagnostics.push(CsvModelDiagnostic {
413 level: DiagnosticLevel::Error,
414 file: "collections.csv".to_string(),
415 line: Some(line),
416 message: format!("failed to parse row: {}", e),
417 });
418 continue;
419 }
420 };
421
422 let collection_name = get_field_required(
423 &record,
424 &headers,
425 "collection_name",
426 "collections.csv",
427 line,
428 diagnostics,
429 )
430 .unwrap_or("")
431 .to_string();
432 let concept_label = get_field_required(
433 &record,
434 &headers,
435 "concept_label",
436 "collections.csv",
437 line,
438 diagnostics,
439 )
440 .unwrap_or("")
441 .to_string();
442
443 if collection_name.is_empty() || concept_label.is_empty() {
444 continue;
445 }
446
447 rows.push(CollectionRow {
448 collection_name,
449 concept_label,
450 parent_label: get_field(&record, &headers, "parent_label").map(String::from),
451 sort_order: get_field(&record, &headers, "sort_order").and_then(|v| v.parse().ok()),
452 });
453 }
454 rows
455}
456
457pub fn validate_model_csvs(bundle: &ModelCsvBundle) -> Vec<CsvModelDiagnostic> {
462 let mut diagnostics = Vec::new();
463
464 let mut aliases: HashSet<&str> = HashSet::new();
466 let mut parent_aliases: HashSet<&str> = HashSet::new();
467 let mut collection_refs: HashSet<&str> = HashSet::new();
468
469 for (i, node) in bundle.nodes.iter().enumerate() {
470 let line = i + 2;
471
472 if !aliases.insert(&node.alias) {
474 diagnostics.push(CsvModelDiagnostic {
475 level: DiagnosticLevel::Error,
476 file: "nodes.csv".to_string(),
477 line: Some(line),
478 message: format!("duplicate alias \"{}\"", node.alias),
479 });
480 }
481
482 if !VALID_DATATYPES.contains(&node.datatype.as_str()) {
484 diagnostics.push(CsvModelDiagnostic {
485 level: DiagnosticLevel::Error,
486 file: "nodes.csv".to_string(),
487 line: Some(line),
488 message: format!("invalid datatype \"{}\"", node.datatype),
489 });
490 }
491
492 if node.cardinality != "1" && node.cardinality != "n" {
494 diagnostics.push(CsvModelDiagnostic {
495 level: DiagnosticLevel::Error,
496 file: "nodes.csv".to_string(),
497 line: Some(line),
498 message: format!(
499 "invalid cardinality \"{}\" (must be \"1\" or \"n\")",
500 node.cardinality
501 ),
502 });
503 }
504
505 for class in split_class_cell(&node.ontology_class) {
509 if !class.starts_with(CRM_PREFIX) {
510 diagnostics.push(CsvModelDiagnostic {
511 level: DiagnosticLevel::Warning,
512 file: "nodes.csv".to_string(),
513 line: Some(line),
514 message: format!("ontology_class \"{}\" does not use CIDOC-CRM prefix", class),
515 });
516 }
517 }
518 if !node.parent_property.starts_with(CRM_PREFIX) && !node.parent_property.is_empty() {
519 diagnostics.push(CsvModelDiagnostic {
520 level: DiagnosticLevel::Warning,
521 file: "nodes.csv".to_string(),
522 line: Some(line),
523 message: format!(
524 "parent_property \"{}\" does not use CIDOC-CRM prefix",
525 node.parent_property
526 ),
527 });
528 }
529
530 if (node.datatype == "concept" || node.datatype == "concept-list")
532 && node.collection_name.is_none()
533 {
534 diagnostics.push(CsvModelDiagnostic {
535 level: DiagnosticLevel::Warning,
536 file: "nodes.csv".to_string(),
537 line: Some(line),
538 message: format!("concept node \"{}\" has no collection_name", node.alias),
539 });
540 }
541
542 if let Some(ref cn) = node.collection_name {
543 collection_refs.insert(cn.as_str());
544 }
545
546 if let Some(ref pa) = node.parent_alias {
547 parent_aliases.insert(pa.as_str());
548 }
549 }
550
551 for (i, node) in bundle.nodes.iter().enumerate() {
553 if let Some(ref pa) = node.parent_alias {
554 if !aliases.contains(pa.as_str()) {
555 diagnostics.push(CsvModelDiagnostic {
556 level: DiagnosticLevel::Error,
557 file: "nodes.csv".to_string(),
558 line: Some(i + 2),
559 message: format!("parent_alias \"{}\" not found in defined aliases", pa),
560 });
561 }
562 }
563 }
564
565 for node in &bundle.nodes {
567 if node.datatype == "semantic" && !parent_aliases.contains(node.alias.as_str()) {
568 diagnostics.push(CsvModelDiagnostic {
569 level: DiagnosticLevel::Warning,
570 file: "nodes.csv".to_string(),
571 line: None,
572 message: format!("semantic node \"{}\" has no children", node.alias),
573 });
574 }
575 }
576
577 let mut collection_names: HashSet<&str> = HashSet::new();
579 let mut concepts_by_collection: HashMap<&str, HashSet<&str>> = HashMap::new();
580
581 for (i, row) in bundle.collections.iter().enumerate() {
582 let line = i + 2;
583 collection_names.insert(&row.collection_name);
584
585 let labels = concepts_by_collection
586 .entry(&row.collection_name)
587 .or_default();
588 labels.insert(&row.concept_label);
589
590 if let Some(ref parent) = row.parent_label {
592 if !labels.contains(parent.as_str()) {
593 }
596 if parent == &row.concept_label {
598 diagnostics.push(CsvModelDiagnostic {
599 level: DiagnosticLevel::Error,
600 file: "collections.csv".to_string(),
601 line: Some(line),
602 message: format!(
603 "concept \"{}\" references itself as parent",
604 row.concept_label
605 ),
606 });
607 }
608 }
609 }
610
611 for (i, row) in bundle.collections.iter().enumerate() {
613 if let Some(ref parent) = row.parent_label {
614 if let Some(labels) = concepts_by_collection.get(row.collection_name.as_str()) {
615 if !labels.contains(parent.as_str()) {
616 diagnostics.push(CsvModelDiagnostic {
617 level: DiagnosticLevel::Error,
618 file: "collections.csv".to_string(),
619 line: Some(i + 2),
620 message: format!(
621 "parent_label \"{}\" not found in collection \"{}\"",
622 parent, row.collection_name
623 ),
624 });
625 }
626 }
627 }
628 }
629
630 for cn in &collection_refs {
638 if !collection_names.contains(cn) {
639 diagnostics.push(CsvModelDiagnostic {
640 level: DiagnosticLevel::Warning,
641 file: "nodes.csv".to_string(),
642 line: None,
643 message: format!(
644 "references collection \"{}\" but it is not defined in collections.csv (expected if loaded from external SKOS)",
645 cn
646 ),
647 });
648 }
649 }
650
651 if let Some(ref oc) = bundle.graph.ontology_class {
653 for class in split_class_cell(oc) {
654 if !class.starts_with(CRM_PREFIX) {
655 diagnostics.push(CsvModelDiagnostic {
656 level: DiagnosticLevel::Warning,
657 file: "graph.csv".to_string(),
658 line: None,
659 message: format!("ontology_class \"{}\" does not use CIDOC-CRM prefix", class),
660 });
661 }
662 }
663 }
664
665 diagnostics
666}
667
668pub fn model_csvs_to_instructions(
679 bundle: &ModelCsvBundle,
680 rdm_namespace: &str,
681) -> Result<Vec<GraphInstruction>, CsvModelError> {
682 let ns = parse_rdm_namespace(rdm_namespace).map_err(|e| CsvModelError {
683 diagnostics: vec![CsvModelDiagnostic {
684 level: DiagnosticLevel::Error,
685 file: "(namespace)".to_string(),
686 line: None,
687 message: e,
688 }],
689 })?;
690
691 let mut instructions = Vec::new();
692
693 let root_alias = crate::graph_mutator::slugify(&bundle.graph.name);
695
696 let mut create = GraphInstruction::new("create_model", &root_alias, "");
698 create = create.with_str("name", &bundle.graph.name);
699 if let Some(ref oc) = bundle.graph.ontology_class {
700 let classes = split_class_cell(oc);
701 if classes.len() == 1 {
702 create = create.with_str("ontology_class", &classes[0]);
703 } else if !classes.is_empty() {
704 create = create.with_param(
705 "ontology_class",
706 serde_json::Value::Array(
707 classes.into_iter().map(serde_json::Value::String).collect(),
708 ),
709 );
710 }
711 }
712 instructions.push(create);
713
714 let collection_ids = build_collection_id_map(bundle, &ns);
716
717 let sorted = topological_sort(&bundle.nodes);
719
720 for node in &sorted {
722 let subject = match &node.parent_alias {
723 Some(pa) => pa.as_str(),
724 None => root_alias.as_str(),
725 };
726
727 let mut instr = GraphInstruction::new("add_node", subject, &node.alias);
728 instr = instr.with_str("name", &node.name);
729 instr = instr.with_str("datatype", &node.datatype);
730 instr = instr.with_str("cardinality", &node.cardinality);
731 let classes = split_class_cell(&node.ontology_class);
733 if classes.len() == 1 {
734 instr = instr.with_str("ontology_class", &classes[0]);
735 } else if !classes.is_empty() {
736 instr = instr.with_param(
737 "ontology_class",
738 serde_json::Value::Array(
739 classes.into_iter().map(serde_json::Value::String).collect(),
740 ),
741 );
742 }
743 instr = instr.with_str("parent_property", &node.parent_property);
744
745 if let Some(ref desc) = node.description {
746 instr = instr.with_str("description", desc);
747 }
748 if let Some(req) = node.required {
749 instr = instr.with_param("isrequired", serde_json::Value::Bool(req));
750 }
751 if let Some(search) = node.searchable {
752 instr = instr.with_param("issearchable", serde_json::Value::Bool(search));
753 }
754 if let Some(exp) = node.exportable {
755 instr = instr.with_param("exportable", serde_json::Value::Bool(exp));
756 }
757 if let Some(so) = node.sortorder {
758 instr = instr.with_param("sortorder", serde_json::Value::Number(so.into()));
759 }
760
761 if node.datatype == "concept" || node.datatype == "concept-list" {
763 if let Some(ref cn) = node.collection_name {
764 if let Some(cid) = collection_ids.get(cn.as_str()) {
765 let config = serde_json::json!({ "rdmCollection": cid });
766 instr = instr.with_param("config", config);
767 }
768 }
769 }
770
771 instructions.push(instr);
772 }
773
774 Ok(instructions)
775}
776
777pub fn model_csvs_to_collections(
783 bundle: &ModelCsvBundle,
784 rdm_namespace: &str,
785) -> Result<Vec<SkosCollection>, CsvModelError> {
786 let ns = parse_rdm_namespace(rdm_namespace).map_err(|e| CsvModelError {
787 diagnostics: vec![CsvModelDiagnostic {
788 level: DiagnosticLevel::Error,
789 file: "(namespace)".to_string(),
790 line: None,
791 message: e,
792 }],
793 })?;
794
795 let mut grouped: HashMap<&str, Vec<&CollectionRow>> = HashMap::new();
797 for row in &bundle.collections {
798 grouped.entry(&row.collection_name).or_default().push(row);
799 }
800
801 let mut collections = Vec::new();
802 for (name, rows) in &grouped {
803 let coll_uuid = generate_collection_uuid(&ns, name);
804 let collection_id = coll_uuid.to_string();
805
806 let label_value_id = generate_value_uuid(&collection_id, name, "en");
807 let pref_labels = {
808 let mut m = HashMap::new();
809 m.insert(
810 "en".to_string(),
811 SkosValue {
812 id: label_value_id.to_string(),
813 value: name.to_string(),
814 },
815 );
816 m
817 };
818
819 let mut concept_map: HashMap<&str, SkosConcept> = HashMap::new();
821 let mut all_concepts: HashMap<String, SkosConcept> = HashMap::new();
822 let mut values: HashMap<String, SkosValue> = HashMap::new();
823
824 for row in rows {
826 let concept_uuid = generate_concept_uuid(&coll_uuid, &row.concept_label);
827 let concept_id = concept_uuid.to_string();
828
829 let label_vid = generate_value_uuid(&concept_id, &row.concept_label, "en");
830 let concept = SkosConcept {
831 id: concept_id.clone(),
832 uri: None,
833 pref_labels: {
834 let mut m = HashMap::new();
835 m.insert(
836 "en".to_string(),
837 SkosValue {
838 id: label_vid.to_string(),
839 value: row.concept_label.clone(),
840 },
841 );
842 m
843 },
844 source: None,
845 sort_order: row.sort_order,
846 children: Some(Vec::new()),
847 };
848 values.insert(
849 label_vid.to_string(),
850 SkosValue {
851 id: label_vid.to_string(),
852 value: row.concept_label.clone(),
853 },
854 );
855 all_concepts.insert(concept_id, concept.clone());
856 concept_map.insert(&row.concept_label, concept);
857 }
858
859 let mut top_level_labels: Vec<&str> = Vec::new();
861 for row in rows {
862 if let Some(ref parent_label) = row.parent_label {
863 let child = concept_map.get(row.concept_label.as_str()).cloned();
865 if let (Some(parent), Some(child)) =
866 (concept_map.get_mut(parent_label.as_str()), child)
867 {
868 if let Some(ref mut children) = parent.children {
869 children.push(child);
870 }
871 }
872 } else {
873 top_level_labels.push(&row.concept_label);
874 }
875 }
876
877 let mut top_concepts: HashMap<String, SkosConcept> = HashMap::new();
879 for label in &top_level_labels {
880 if let Some(concept) = concept_map.get(label) {
881 top_concepts.insert(concept.id.clone(), concept.clone());
882 }
883 }
884
885 collections.push(SkosCollection {
886 id: collection_id,
887 uri: None,
888 pref_labels,
889 alt_labels: HashMap::new(),
890 scope_notes: HashMap::new(),
891 node_type: SkosNodeType::ConceptScheme,
892 concepts: top_concepts,
893 all_concepts,
894 values,
895 });
896 }
897
898 Ok(collections)
899}
900
901pub fn build_graph_from_model_csvs(
915 graph_csv: &str,
916 nodes_csv: &str,
917 collections_csv: Option<&str>,
918 rdm_namespace: &str,
919 options: MutatorOptions,
920) -> Result<(StaticGraph, Vec<SkosCollection>), CsvModelError> {
921 let (bundle, mut diagnostics) = parse_model_csvs(graph_csv, nodes_csv, collections_csv)?;
922
923 let validation = validate_model_csvs(&bundle);
925 let has_errors = validation.iter().any(|d| d.level == DiagnosticLevel::Error);
926 diagnostics.extend(validation);
927
928 if has_errors {
929 return Err(CsvModelError { diagnostics });
930 }
931
932 let instructions = model_csvs_to_instructions(&bundle, rdm_namespace)?;
934 let graph = crate::graph_mutator::build_graph_from_instructions(instructions, options)
935 .map_err(|e| CsvModelError {
936 diagnostics: {
937 diagnostics.push(CsvModelDiagnostic {
938 level: DiagnosticLevel::Error,
939 file: "(build)".to_string(),
940 line: None,
941 message: e,
942 });
943 diagnostics
944 },
945 })?;
946
947 let collections = model_csvs_to_collections(&bundle, rdm_namespace)?;
948
949 Ok((graph, collections))
950}
951
952pub fn validate_model_csvs_from_strings(
957 graph_csv: &str,
958 nodes_csv: &str,
959 collections_csv: Option<&str>,
960) -> Vec<CsvModelDiagnostic> {
961 match parse_model_csvs(graph_csv, nodes_csv, collections_csv) {
962 Ok((bundle, mut parse_diags)) => {
963 let validation = validate_model_csvs(&bundle);
964 parse_diags.extend(validation);
965 parse_diags
966 }
967 Err(e) => e.diagnostics,
968 }
969}
970
971fn build_collection_id_map<'a>(
974 bundle: &'a ModelCsvBundle,
975 namespace: &Uuid,
976) -> HashMap<&'a str, String> {
977 let mut map = HashMap::new();
978 let mut seen: HashSet<&str> = HashSet::new();
979 for row in &bundle.collections {
980 if seen.insert(&row.collection_name) {
981 map.insert(
982 row.collection_name.as_str(),
983 generate_collection_uuid(namespace, &row.collection_name).to_string(),
984 );
985 }
986 }
987 map
988}
989
990fn topological_sort(nodes: &[NodeRow]) -> Vec<&NodeRow> {
991 let by_alias: HashMap<&str, &NodeRow> = nodes.iter().map(|n| (n.alias.as_str(), n)).collect();
992 let mut visited: HashSet<&str> = HashSet::new();
993 let mut sorted: Vec<&NodeRow> = Vec::new();
994
995 fn visit<'a>(
996 alias: &'a str,
997 by_alias: &HashMap<&str, &'a NodeRow>,
998 visited: &mut HashSet<&'a str>,
999 sorted: &mut Vec<&'a NodeRow>,
1000 ) {
1001 if visited.contains(alias) {
1002 return;
1003 }
1004 visited.insert(alias);
1005 if let Some(node) = by_alias.get(alias) {
1006 if let Some(ref pa) = node.parent_alias {
1007 if by_alias.contains_key(pa.as_str()) {
1008 visit(pa, by_alias, visited, sorted);
1009 }
1010 }
1011 sorted.push(node);
1012 }
1013 }
1014
1015 for node in nodes {
1016 visit(&node.alias, &by_alias, &mut visited, &mut sorted);
1017 }
1018 sorted
1019}
1020
1021#[cfg(test)]
1022mod tests {
1023 use super::*;
1024
1025 const GRAPH_CSV: &str = r#"name,ontology_class,author,description,is_resource
1026Heritage Monument,http://www.cidoc-crm.org/cidoc-crm/E24_Physical_Human-Made_Thing,,A heritage monument,true"#;
1027
1028 const NODES_CSV: &str = r#"parent_alias,alias,name,datatype,cardinality,ontology_class,parent_property,description,collection_name,required,searchable,exportable,sortorder
1029,name,Name,string,1,http://www.cidoc-crm.org/cidoc-crm/E41_Appellation,http://www.cidoc-crm.org/cidoc-crm/P1_is_identified_by,Primary name,,true,true,true,1
1030,monument_type,Monument Type,concept,1,http://www.cidoc-crm.org/cidoc-crm/E55_Type,http://www.cidoc-crm.org/cidoc-crm/P2_has_type,Type classification,Monument Types,true,true,true,2
1031,location,Location,semantic,n,http://www.cidoc-crm.org/cidoc-crm/E53_Place,http://www.cidoc-crm.org/cidoc-crm/P53_has_former_or_current_location,,,false,true,true,3
1032location,place_name,Place Name,string,1,http://www.cidoc-crm.org/cidoc-crm/E44_Place_Appellation,http://www.cidoc-crm.org/cidoc-crm/P87_is_identified_by,,,true,true,true,1
1033location,geometry,Geometry,geojson-feature-collection,1,http://www.cidoc-crm.org/cidoc-crm/E94_Space_Primitive,http://www.cidoc-crm.org/cidoc-crm/P168_place_is_defined_by,,,false,false,true,2"#;
1034
1035 const COLLECTIONS_CSV: &str = r#"collection_name,concept_label,parent_label,sort_order
1036Monument Types,Castle,,1
1037Monument Types,Church,,2
1038Monument Types,Bridge,,3
1039Monument Types,Fortification,,4
1040Monument Types,Motte,Castle,5"#;
1041
1042 #[test]
1043 fn test_parse_and_validate() {
1044 let (bundle, parse_diags) =
1045 parse_model_csvs(GRAPH_CSV, NODES_CSV, Some(COLLECTIONS_CSV)).unwrap();
1046 assert!(parse_diags
1047 .iter()
1048 .all(|d| d.level != DiagnosticLevel::Error));
1049 assert_eq!(bundle.graph.name, "Heritage Monument");
1050 assert_eq!(bundle.nodes.len(), 5);
1051 assert_eq!(bundle.collections.len(), 5);
1052
1053 let validation = validate_model_csvs(&bundle);
1054 let errors: Vec<_> = validation
1055 .iter()
1056 .filter(|d| d.level == DiagnosticLevel::Error)
1057 .collect();
1058 assert!(errors.is_empty(), "Unexpected errors: {:?}", errors);
1059 }
1060
1061 const TEST_NAMESPACE: &str = "http://test.example.org/rdm/";
1062
1063 #[test]
1064 fn test_to_instructions() {
1065 let (bundle, _) = parse_model_csvs(GRAPH_CSV, NODES_CSV, Some(COLLECTIONS_CSV)).unwrap();
1066 let instructions = model_csvs_to_instructions(&bundle, TEST_NAMESPACE).unwrap();
1067
1068 assert_eq!(instructions[0].action, "create_model");
1069 assert_eq!(instructions.len(), 6); let aliases: Vec<&str> = instructions
1072 .iter()
1073 .filter(|i| i.action == "add_node")
1074 .map(|i| i.object.as_str())
1075 .collect();
1076 let loc_idx = aliases.iter().position(|a| *a == "location").unwrap();
1077 let pn_idx = aliases.iter().position(|a| *a == "place_name").unwrap();
1078 let geo_idx = aliases.iter().position(|a| *a == "geometry").unwrap();
1079 assert!(loc_idx < pn_idx);
1080 assert!(loc_idx < geo_idx);
1081 }
1082
1083 #[test]
1084 fn test_to_collections() {
1085 let (bundle, _) = parse_model_csvs(GRAPH_CSV, NODES_CSV, Some(COLLECTIONS_CSV)).unwrap();
1086 let collections = model_csvs_to_collections(&bundle, TEST_NAMESPACE).unwrap();
1087
1088 assert_eq!(collections.len(), 1);
1089 let coll = &collections[0];
1090 assert_eq!(coll.all_concepts.len(), 5);
1091 assert_eq!(coll.concepts.len(), 4); }
1094
1095 #[test]
1096 fn test_build_graph() {
1097 let result = build_graph_from_model_csvs(
1098 GRAPH_CSV,
1099 NODES_CSV,
1100 Some(COLLECTIONS_CSV),
1101 TEST_NAMESPACE,
1102 MutatorOptions::default(),
1103 );
1104 let (graph, collections) = result.unwrap();
1105 assert_eq!(graph.nodes.len(), 6);
1107 assert_eq!(graph.edges.len(), 5);
1108 assert_eq!(collections.len(), 1);
1109 }
1110
1111 #[test]
1112 fn test_dangling_parent_alias() {
1113 let bad_nodes = r#"parent_alias,alias,name,datatype,cardinality,ontology_class,parent_property
1114nonexistent,child,Child,string,1,http://www.cidoc-crm.org/cidoc-crm/E62_String,http://www.cidoc-crm.org/cidoc-crm/P3_has_note"#;
1115 let diags = validate_model_csvs_from_strings(GRAPH_CSV, bad_nodes, None);
1116 assert!(diags
1117 .iter()
1118 .any(|d| d.message.contains("nonexistent") && d.level == DiagnosticLevel::Error));
1119 }
1120
1121 #[test]
1122 fn test_missing_collection() {
1123 let nodes_with_concept = r#"parent_alias,alias,name,datatype,cardinality,ontology_class,parent_property,collection_name
1126,my_type,Type,concept,1,http://www.cidoc-crm.org/cidoc-crm/E55_Type,http://www.cidoc-crm.org/cidoc-crm/P2_has_type,Missing Collection"#;
1127 let diags = validate_model_csvs_from_strings(GRAPH_CSV, nodes_with_concept, None);
1128 assert!(diags.iter().any(
1129 |d| d.message.contains("Missing Collection") && d.level == DiagnosticLevel::Warning
1130 ));
1131 }
1132}