Skip to main content

alizarin_core/
exporter.rs

1//! Prebuild directory exporter.
2//!
3//! The converse of [`PrebuildLoader`](crate::loader::PrebuildLoader). Builds
4//! export data structures from registered graphs and RDM collections, then
5//! optionally writes them to the Arches "pkg" directory structure.
6//!
7//! Two-layer design:
8//! - **Data-building layer** (platform-agnostic): produces [`PrebuildExportData`]
9//! - **Directory-writing layer** (`fs_writer`, not available on WASM): writes to disk
10
11use std::collections::HashSet;
12
13use crate::rdm_cache::{rdm_to_skos_collection_excluding, RdmCache, RdmCollection};
14use crate::registry::{get_graph, get_registered_graph_ids};
15use crate::skos::collection_to_skos_xml;
16use crate::string_utils::sort_json_keys;
17use crate::StaticGraph;
18
19// ============================================================================
20// Types
21// ============================================================================
22
23/// A single file to be written, as a relative path + content string.
24#[derive(Debug, Clone)]
25pub struct ExportFile {
26    /// Relative path within the pkg directory (e.g. "graphs/resource_models/MyModel.json")
27    pub relative_path: String,
28    /// File content (JSON or XML)
29    pub content: String,
30}
31
32/// Complete export data, ready to be written to a directory.
33///
34/// This is the platform-agnostic output of the data-building layer.
35/// WASM consumers use this directly; NAPI/Python consumers pass it
36/// to [`fs_writer::write_to_directory`].
37#[derive(Debug, Clone, Default)]
38pub struct PrebuildExportData {
39    pub graph_files: Vec<ExportFile>,
40    pub reference_data_files: Vec<ExportFile>,
41    pub business_data_files: Vec<ExportFile>,
42}
43
44impl PrebuildExportData {
45    /// Get all files as a flat list.
46    pub fn all_files(&self) -> Vec<&ExportFile> {
47        let mut files = Vec::new();
48        files.extend(self.graph_files.iter());
49        files.extend(self.reference_data_files.iter());
50        files.extend(self.business_data_files.iter());
51        files
52    }
53
54    /// Total number of files.
55    pub fn file_count(&self) -> usize {
56        self.graph_files.len() + self.reference_data_files.len() + self.business_data_files.len()
57    }
58}
59
60/// Error type for export operations.
61#[derive(Debug)]
62pub enum ExportError {
63    SerializationError(String),
64    RegistryError(String),
65    IoError(std::io::Error),
66}
67
68impl std::fmt::Display for ExportError {
69    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
70        match self {
71            ExportError::SerializationError(s) => write!(f, "Serialization error: {}", s),
72            ExportError::RegistryError(s) => write!(f, "Registry error: {}", s),
73            ExportError::IoError(e) => write!(f, "IO error: {}", e),
74        }
75    }
76}
77
78impl std::error::Error for ExportError {}
79
80impl From<std::io::Error> for ExportError {
81    fn from(e: std::io::Error) -> Self {
82        ExportError::IoError(e)
83    }
84}
85
86impl From<serde_json::Error> for ExportError {
87    fn from(e: serde_json::Error) -> Self {
88        ExportError::SerializationError(e.to_string())
89    }
90}
91
92// ============================================================================
93// Data-building layer (platform-agnostic)
94// ============================================================================
95
96/// Wrap a graph in the Arches export format `{"graph": [<graph_data>]}` with
97/// deterministically sorted keys.
98fn wrap_graph_json(graph: &StaticGraph) -> Result<serde_json::Value, ExportError> {
99    let graph_value = serde_json::to_value(graph).map_err(|e| {
100        ExportError::SerializationError(format!(
101            "Failed to serialize graph {}: {}",
102            graph.graphid, e
103        ))
104    })?;
105    let sorted = sort_json_keys(graph_value);
106    Ok(serde_json::json!({ "graph": [sorted] }))
107}
108
109/// Determine the filename for a graph based on its display name.
110///
111/// Uses the English name (or graphid as fallback), sanitized for filesystem use.
112/// Matches the behaviour of the quartz-graphs `run.py` exporter.
113fn graph_filename(graph: &StaticGraph) -> String {
114    let name = graph.display_name();
115    let name = if name.is_empty() {
116        graph.graphid.clone()
117    } else {
118        name
119    };
120    let sanitized = name.replace('/', "_");
121    format!("{}.json", sanitized.trim())
122}
123
124/// Export specific graphs by ID from the global registry.
125///
126/// Classifies each as `resource_models` or `branches` based on `isresource`,
127/// wraps as `{"graph": [sorted_data]}`.
128pub fn export_graphs(graph_ids: &[String]) -> Result<Vec<ExportFile>, ExportError> {
129    let mut files = Vec::new();
130
131    for graph_id in graph_ids {
132        let graph = get_graph(graph_id).ok_or_else(|| {
133            ExportError::RegistryError(format!("Graph '{}' not registered", graph_id))
134        })?;
135
136        let subdir = if graph.isresource.unwrap_or(false) {
137            "resource_models"
138        } else {
139            "branches"
140        };
141
142        let filename = graph_filename(&graph);
143        let wrapped = wrap_graph_json(&graph)?;
144        let content = serde_json::to_string_pretty(&wrapped)?;
145
146        files.push(ExportFile {
147            relative_path: format!("graphs/{}/{}", subdir, filename),
148            content,
149        });
150    }
151
152    Ok(files)
153}
154
155/// Export all registered graphs.
156pub fn export_all_graphs() -> Result<Vec<ExportFile>, ExportError> {
157    let ids = get_registered_graph_ids();
158    export_graphs(&ids)
159}
160
161/// Export RDM collections as SKOS ConceptScheme XML files.
162///
163/// Iterates all collections in the cache, converts each to SKOS XML
164/// as a ConceptScheme and writes to `reference_data/controlled_lists/`.
165/// This format is compatible with the `arches_controlled_lists` importer.
166///
167/// Concepts that appear in multiple collections are only emitted once (in the
168/// first collection encountered, sorted by collection ID). This prevents
169/// duplicate-key errors when the CLM importer creates `ListItem` rows.
170pub fn export_collections(
171    rdm_cache: &RdmCache,
172    base_uri: &str,
173) -> Result<Vec<ExportFile>, ExportError> {
174    let mut files = Vec::new();
175
176    let mut collection_ids = rdm_cache.get_collection_ids();
177    collection_ids.sort();
178
179    for collection_id in &collection_ids {
180        if let Some(collection) = rdm_cache.get_collection(collection_id) {
181            let file = export_single_collection(collection, base_uri, "ConceptScheme")?;
182            files.push(file);
183        }
184    }
185
186    Ok(files)
187}
188
189/// Export a single RDM collection as SKOS XML.
190///
191/// Callers that need per-collection control over `node_type` can use this
192/// instead of [`export_collections`].
193pub fn export_single_collection(
194    collection: &RdmCollection,
195    base_uri: &str,
196    node_type: &str,
197) -> Result<ExportFile, ExportError> {
198    let skos = rdm_to_skos_collection_excluding(collection, node_type, &HashSet::new());
199    let xml = collection_to_skos_xml(&skos, base_uri);
200
201    Ok(ExportFile {
202        relative_path: format!("reference_data/controlled_lists/{}.xml", collection.id),
203        content: xml,
204    })
205}
206
207/// Build a complete prebuild export from registered graphs and an RDM cache.
208///
209/// This is the main entry point for the data-building layer.
210///
211/// - `graph_ids`: if `Some`, export only these graphs; if `None`, export all registered graphs.
212/// - `rdm_cache`: if `Some`, export collections as SKOS XML.
213/// - `base_uri`: base URI for SKOS resources.
214pub fn build_prebuild_export(
215    graph_ids: Option<&[String]>,
216    rdm_cache: Option<&RdmCache>,
217    base_uri: &str,
218) -> Result<PrebuildExportData, ExportError> {
219    let graph_files = match graph_ids {
220        Some(ids) => export_graphs(ids)?,
221        None => export_all_graphs()?,
222    };
223
224    let reference_data_files = match rdm_cache {
225        Some(cache) => export_collections(cache, base_uri)?,
226        None => Vec::new(),
227    };
228
229    Ok(PrebuildExportData {
230        graph_files,
231        reference_data_files,
232        ..Default::default()
233    })
234}
235
236// ============================================================================
237// Directory-writing layer (filesystem, not available in WASM)
238// ============================================================================
239
240#[cfg(not(target_arch = "wasm32"))]
241pub mod fs_writer {
242    use super::*;
243    use std::fs;
244    use std::path::Path;
245
246    /// Write [`PrebuildExportData`] to a directory on disk.
247    ///
248    /// Creates subdirectories as needed. Existing files with the same
249    /// names are overwritten.
250    ///
251    /// Returns the list of absolute paths written.
252    pub fn write_to_directory(
253        data: &PrebuildExportData,
254        out_dir: &Path,
255    ) -> Result<Vec<String>, ExportError> {
256        let mut written = Vec::new();
257
258        for file in data.all_files() {
259            let full_path = out_dir.join(&file.relative_path);
260
261            if let Some(parent) = full_path.parent() {
262                fs::create_dir_all(parent)?;
263            }
264
265            fs::write(&full_path, &file.content)?;
266            written.push(full_path.display().to_string());
267        }
268
269        Ok(written)
270    }
271
272    /// Convenience: build export data and write to directory in one step.
273    pub fn export_prebuild_to_directory(
274        graph_ids: Option<&[String]>,
275        rdm_cache: Option<&RdmCache>,
276        base_uri: &str,
277        out_dir: &Path,
278    ) -> Result<Vec<String>, ExportError> {
279        let data = super::build_prebuild_export(graph_ids, rdm_cache, base_uri)?;
280        write_to_directory(&data, out_dir)
281    }
282}
283
284// ============================================================================
285// Tests
286// ============================================================================
287
288#[cfg(test)]
289mod tests {
290    use super::*;
291    use crate::registry::{register_graph_owned, unregister_graph};
292
293    fn test_graph(id: &str, is_resource: bool) -> StaticGraph {
294        let json = format!(
295            r#"{{
296                "graphid": "{}",
297                "name": {{"en": "Test {}"}},
298                "isresource": {},
299                "nodes": [{{
300                    "nodeid": "root",
301                    "name": "Root",
302                    "datatype": "semantic",
303                    "graph_id": "{}"
304                }}],
305                "root": {{
306                    "nodeid": "root",
307                    "name": "Root",
308                    "datatype": "semantic",
309                    "graph_id": "{}"
310                }}
311            }}"#,
312            id, id, is_resource, id, id
313        );
314        StaticGraph::from_json_string(&json).expect("Failed to create test graph")
315    }
316
317    #[test]
318    fn test_export_graphs_classification() {
319        let rm = test_graph("exporter-rm-1", true);
320        let branch = test_graph("exporter-branch-1", false);
321        register_graph_owned(rm);
322        register_graph_owned(branch);
323
324        let files = export_graphs(&["exporter-rm-1".into(), "exporter-branch-1".into()]).unwrap();
325        assert_eq!(files.len(), 2);
326
327        let rm_file = files
328            .iter()
329            .find(|f| f.relative_path.contains("resource_models"))
330            .unwrap();
331        let br_file = files
332            .iter()
333            .find(|f| f.relative_path.contains("branches"))
334            .unwrap();
335
336        // Verify wrapped format
337        let rm_json: serde_json::Value = serde_json::from_str(&rm_file.content).unwrap();
338        assert!(rm_json["graph"].is_array());
339        assert_eq!(rm_json["graph"][0]["graphid"], "exporter-rm-1");
340
341        let br_json: serde_json::Value = serde_json::from_str(&br_file.content).unwrap();
342        assert!(br_json["graph"].is_array());
343        assert_eq!(br_json["graph"][0]["graphid"], "exporter-branch-1");
344
345        unregister_graph("exporter-rm-1");
346        unregister_graph("exporter-branch-1");
347    }
348
349    #[test]
350    fn test_deterministic_output() {
351        let g = test_graph("exporter-det-1", true);
352        register_graph_owned(g);
353
354        let files1 = export_graphs(&["exporter-det-1".into()]).unwrap();
355        let files2 = export_graphs(&["exporter-det-1".into()]).unwrap();
356
357        assert_eq!(files1[0].content, files2[0].content);
358
359        unregister_graph("exporter-det-1");
360    }
361
362    #[test]
363    fn test_export_collections() {
364        let mut cache = RdmCache::new();
365        cache
366            .add_collection_from_json(
367                "exporter-coll-1",
368                r#"[{"id": "c1", "prefLabel": {"en": "Concept One"}}]"#,
369            )
370            .unwrap();
371
372        let files = export_collections(&cache, "http://example.org/").unwrap();
373        assert_eq!(files.len(), 1);
374        assert!(files[0]
375            .relative_path
376            .contains("reference_data/controlled_lists/"));
377        assert!(files[0].content.contains("xml"));
378        assert!(files[0].content.contains("Concept One"));
379    }
380
381    #[test]
382    fn test_export_missing_graph() {
383        let result = export_graphs(&["nonexistent-exporter-test".into()]);
384        assert!(result.is_err());
385    }
386
387    #[test]
388    fn test_build_prebuild_export() {
389        let g = test_graph("exporter-full-1", true);
390        register_graph_owned(g);
391
392        let mut cache = RdmCache::new();
393        cache
394            .add_collection_from_json(
395                "exporter-coll-2",
396                r#"[{"id": "c2", "prefLabel": {"en": "Test Concept"}}]"#,
397            )
398            .unwrap();
399
400        let data = build_prebuild_export(
401            Some(&["exporter-full-1".into()]),
402            Some(&cache),
403            "http://example.org/",
404        )
405        .unwrap();
406
407        assert_eq!(data.graph_files.len(), 1);
408        assert_eq!(data.reference_data_files.len(), 1);
409        assert_eq!(data.business_data_files.len(), 0);
410        assert_eq!(data.file_count(), 2);
411        assert_eq!(data.all_files().len(), 2);
412
413        unregister_graph("exporter-full-1");
414    }
415
416    #[test]
417    fn test_graph_round_trip() {
418        // Verify the exported format can be parsed back by from_json_string
419        let g = test_graph("exporter-roundtrip-1", true);
420        register_graph_owned(g);
421
422        let files = export_graphs(&["exporter-roundtrip-1".into()]).unwrap();
423        let parsed = StaticGraph::from_json_string(&files[0].content).unwrap();
424        assert_eq!(parsed.graphid, "exporter-roundtrip-1");
425
426        unregister_graph("exporter-roundtrip-1");
427    }
428}
429
430#[cfg(test)]
431#[cfg(not(target_arch = "wasm32"))]
432mod fs_tests {
433    use super::*;
434    use std::path::PathBuf;
435
436    #[test]
437    fn test_write_to_directory() {
438        let data = PrebuildExportData {
439            graph_files: vec![ExportFile {
440                relative_path: "graphs/resource_models/Test.json".into(),
441                content: r#"{"graph": [{}]}"#.into(),
442            }],
443            ..Default::default()
444        };
445
446        let tmp = std::env::temp_dir().join("alizarin_exporter_test");
447        let _ = std::fs::remove_dir_all(&tmp);
448
449        let written = fs_writer::write_to_directory(&data, &tmp).unwrap();
450        assert_eq!(written.len(), 1);
451        assert!(PathBuf::from(&written[0]).exists());
452
453        let _ = std::fs::remove_dir_all(&tmp);
454    }
455}