use crate::graph::{
IndexedGraph, StaticGraph, StaticResource, StaticResourceDescriptors, StaticResourceMetadata,
StaticResourceSummary, StaticTile,
};
use crate::ontology::{OntologyConfig, OntologyValidator};
use crate::skos::{parse_skos_to_collections, SkosCollection};
#[cfg(feature = "parallel")]
use rayon::prelude::*;
use serde::Deserialize;
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::mpsc::Sender;
#[derive(Debug, Deserialize)]
struct BusinessDataFile {
business_data: BusinessDataContent,
}
#[derive(Debug, Deserialize)]
struct BusinessDataContent {
#[serde(default)]
resources: Vec<BusinessDataResource>,
}
#[derive(Debug, Deserialize)]
struct BusinessDataResource {
resourceinstance: BusinessDataResourceInstance,
#[serde(default)]
metadata: Option<HashMap<String, String>>,
}
#[derive(Debug, Deserialize)]
struct BusinessDataResourceInstance {
resourceinstanceid: String,
graph_id: String,
name: String,
#[serde(default)]
descriptors: Option<FlexibleDescriptors>,
#[serde(default)]
createdtime: Option<String>,
#[serde(default)]
lastmodified: Option<String>,
#[serde(default)]
publication_id: Option<String>,
#[serde(default)]
principaluser_id: Option<i32>,
#[serde(default)]
legacyid: Option<String>,
#[serde(default)]
graph_publication_id: Option<String>,
}
#[derive(Debug)]
struct FlexibleDescriptors {
resolved: StaticResourceDescriptors,
}
impl FlexibleDescriptors {
fn get_for_lang(&self, _lang: &str) -> Option<StaticResourceDescriptors> {
if self.resolved.is_empty() {
None
} else {
Some(self.resolved.clone())
}
}
}
impl<'de> Deserialize<'de> for FlexibleDescriptors {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let value = serde_json::Value::deserialize(deserializer)?;
if let Ok(flat) = serde_json::from_value::<StaticResourceDescriptors>(value.clone()) {
if !flat.is_empty() {
return Ok(FlexibleDescriptors { resolved: flat });
}
}
if let Ok(nested) =
serde_json::from_value::<HashMap<String, StaticResourceDescriptors>>(value)
{
let resolved = nested
.get("en")
.or_else(|| nested.values().next())
.cloned()
.unwrap_or_default();
return Ok(FlexibleDescriptors { resolved });
}
Ok(FlexibleDescriptors {
resolved: StaticResourceDescriptors::default(),
})
}
}
impl BusinessDataResource {
fn to_summary(&self) -> StaticResourceSummary {
let ri = &self.resourceinstance;
StaticResourceSummary {
resourceinstanceid: ri.resourceinstanceid.clone(),
graph_id: ri.graph_id.clone(),
name: ri.name.clone(),
descriptors: ri.descriptors.as_ref().and_then(|d| d.get_for_lang("en")),
metadata: self.metadata.clone().unwrap_or_default(),
createdtime: ri.createdtime.clone(),
lastmodified: ri.lastmodified.clone(),
publication_id: ri.publication_id.clone(),
principaluser_id: ri.principaluser_id,
legacyid: ri.legacyid.clone(),
graph_publication_id: ri.graph_publication_id.clone(),
}
}
}
#[derive(Debug, Deserialize)]
struct BusinessDataFileCount {
business_data: BusinessDataContentCount,
}
#[derive(Debug, Deserialize)]
struct BusinessDataContentCount {
#[serde(default)]
resources: Vec<BusinessDataResourceCount>,
}
#[derive(Debug, Deserialize)]
struct BusinessDataResourceCount {
resourceinstance: BusinessDataResourceInstanceCount,
}
#[derive(Debug, Deserialize)]
struct BusinessDataResourceInstanceCount {
graph_id: String,
}
#[derive(Debug, Deserialize)]
struct BusinessDataFileFull {
business_data: BusinessDataContentFull,
}
#[derive(Debug, Deserialize)]
struct BusinessDataContentFull {
#[serde(default)]
resources: Vec<BusinessDataResourceFull>,
}
#[derive(Debug, Deserialize)]
struct BusinessDataResourceFull {
resourceinstance: BusinessDataResourceInstanceFull,
#[serde(default)]
tiles: Option<Vec<StaticTile>>,
#[serde(default)]
metadata: Option<HashMap<String, String>>,
#[serde(default, rename = "__cache")]
cache: Option<serde_json::Value>,
#[serde(default, rename = "__scopes")]
scopes: Option<serde_json::Value>,
}
#[derive(Debug, Deserialize)]
struct BusinessDataResourceInstanceFull {
resourceinstanceid: String,
graph_id: String,
name: String,
#[serde(default)]
descriptors: Option<FlexibleDescriptors>,
#[serde(default)]
createdtime: Option<String>,
#[serde(default)]
lastmodified: Option<String>,
#[serde(default)]
publication_id: Option<String>,
#[serde(default)]
principaluser_id: Option<i32>,
#[serde(default)]
legacyid: Option<String>,
#[serde(default)]
graph_publication_id: Option<String>,
}
impl BusinessDataResourceFull {
fn to_static_resource(&self) -> StaticResource {
let ri = &self.resourceinstance;
let descriptors = ri
.descriptors
.as_ref()
.and_then(|d| d.get_for_lang("en"))
.unwrap_or_default();
StaticResource {
resourceinstance: StaticResourceMetadata {
resourceinstanceid: ri.resourceinstanceid.clone(),
graph_id: ri.graph_id.clone(),
name: ri.name.clone(),
descriptors,
createdtime: ri.createdtime.clone(),
lastmodified: ri.lastmodified.clone(),
publication_id: ri.publication_id.clone(),
principaluser_id: ri.principaluser_id,
legacyid: ri.legacyid.clone(),
graph_publication_id: ri.graph_publication_id.clone(),
},
tiles: self.tiles.clone(),
metadata: self.metadata.clone().unwrap_or_default(),
cache: self.cache.clone(),
scopes: self.scopes.clone(),
tiles_loaded: Some(true),
}
}
}
#[derive(Debug)]
pub enum LoaderError {
IoError(std::io::Error),
JsonError(serde_json::Error),
GraphError(String),
NotFound(String),
Other(String),
}
impl std::fmt::Display for LoaderError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LoaderError::IoError(e) => write!(f, "IO error: {}", e),
LoaderError::JsonError(e) => write!(f, "JSON error: {}", e),
LoaderError::GraphError(s) => write!(f, "Graph error: {}", s),
LoaderError::NotFound(s) => write!(f, "Not found: {}", s),
LoaderError::Other(s) => write!(f, "{}", s),
}
}
}
impl std::error::Error for LoaderError {}
impl From<std::io::Error> for LoaderError {
fn from(e: std::io::Error) -> Self {
LoaderError::IoError(e)
}
}
impl From<serde_json::Error> for LoaderError {
fn from(e: serde_json::Error) -> Self {
LoaderError::JsonError(e)
}
}
#[derive(Debug, Clone)]
pub struct PrebuildInfo {
pub path: PathBuf,
pub has_graphs: bool,
pub has_business_data: bool,
pub has_reference_data: bool,
pub has_index_templates: bool,
pub has_ontologies: bool,
pub graph_files: Vec<PathBuf>,
}
pub struct PrebuildLoader {
root_path: PathBuf,
}
impl PrebuildLoader {
pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, LoaderError> {
let root_path = path.as_ref().to_path_buf();
if !root_path.exists() {
return Err(LoaderError::NotFound(format!(
"Prebuild directory not found: {}",
root_path.display()
)));
}
Ok(PrebuildLoader { root_path })
}
pub fn get_info(&self) -> Result<PrebuildInfo, LoaderError> {
let graphs_dir = self.root_path.join("graphs");
let business_data_dir = self.root_path.join("business_data");
let reference_data_dir = self.root_path.join("reference_data");
let index_templates_dir = self.root_path.join("indexTemplates");
let ontologies_dir = self.root_path.join("ontologies");
let graph_files = if graphs_dir.exists() {
self.find_graph_files(&graphs_dir)?
} else {
Vec::new()
};
Ok(PrebuildInfo {
path: self.root_path.clone(),
has_graphs: !graph_files.is_empty(),
has_business_data: business_data_dir.exists(),
has_reference_data: reference_data_dir.exists(),
has_index_templates: index_templates_dir.exists(),
has_ontologies: ontologies_dir.exists(),
graph_files,
})
}
fn find_graph_files(&self, graphs_dir: &Path) -> Result<Vec<PathBuf>, LoaderError> {
let mut files = Vec::new();
let resource_models = graphs_dir.join("resource_models");
if resource_models.exists() {
for entry in fs::read_dir(&resource_models)? {
let entry = entry?;
let path = entry.path();
if path.extension().map(|e| e == "json").unwrap_or(false) {
files.push(path);
}
}
}
let branches = graphs_dir.join("branches");
if branches.exists() {
for entry in fs::read_dir(&branches)? {
let entry = entry?;
let path = entry.path();
if path.extension().map(|e| e == "json").unwrap_or(false) {
files.push(path);
}
}
}
Ok(files)
}
pub fn load_graph<P: AsRef<Path>>(&self, path: P) -> Result<StaticGraph, LoaderError> {
let content = fs::read_to_string(path.as_ref())?;
StaticGraph::from_json_string(&content).map_err(LoaderError::GraphError)
}
pub fn load_indexed_graph<P: AsRef<Path>>(&self, path: P) -> Result<IndexedGraph, LoaderError> {
let graph = self.load_graph(path)?;
Ok(IndexedGraph::new(graph))
}
pub fn load_all_graphs(&self) -> Result<Vec<StaticGraph>, LoaderError> {
let info = self.get_info()?;
let mut graphs = Vec::new();
for path in &info.graph_files {
match self.load_graph(path) {
Ok(graph) => graphs.push(graph),
Err(e) => {
eprintln!("Warning: Failed to load graph {}: {}", path.display(), e);
}
}
}
Ok(graphs)
}
pub fn load_all_indexed_graphs(&self) -> Result<Vec<IndexedGraph>, LoaderError> {
let graphs = self.load_all_graphs()?;
Ok(graphs.into_iter().map(IndexedGraph::new).collect())
}
pub fn load_graphs_by_id(&self) -> Result<HashMap<String, IndexedGraph>, LoaderError> {
let graphs = self.load_all_indexed_graphs()?;
Ok(graphs
.into_iter()
.map(|g| (g.graph.graphid.clone(), g))
.collect())
}
pub fn get_subdir(&self, name: &str) -> PathBuf {
self.root_path.join(name)
}
pub fn root_path(&self) -> &Path {
&self.root_path
}
pub fn find_collection_files(&self) -> Result<Vec<PathBuf>, LoaderError> {
let reference_data = self.root_path.join("reference_data");
if !reference_data.exists() {
return Ok(Vec::new());
}
let mut files = Vec::new();
for subdir in &["concepts", "collections", "controlled_lists", "staging"] {
let dir = reference_data.join(subdir);
if !dir.is_dir() {
continue;
}
for entry in fs::read_dir(&dir)? {
let entry = entry?;
let path = entry.path();
let ext = path.extension().and_then(|e| e.to_str());
if ext == Some("xml") || ext == Some("json") {
files.push(path);
}
}
}
files.sort_by(|a, b| {
let ext_order = |p: &PathBuf| -> u8 {
match p.extension().and_then(|e| e.to_str()) {
Some("xml") => 0,
_ => 1,
}
};
ext_order(a).cmp(&ext_order(b)).then_with(|| a.cmp(b))
});
Ok(files)
}
pub fn load_collections(&self, base_uri: &str) -> Result<Vec<SkosCollection>, LoaderError> {
let files = self.find_collection_files()?;
let mut collections = Vec::new();
let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
for file in &files {
let content = fs::read_to_string(file)?;
let ext = file.extension().and_then(|e| e.to_str());
let parsed: Vec<SkosCollection> = match ext {
Some("xml") => match parse_skos_to_collections(&content, base_uri) {
Ok(p) => p,
Err(e) => {
eprintln!(
"Warning: Failed to parse XML collection {}: {}",
file.display(),
e
);
continue;
}
},
Some("json") => {
if let Ok(coll) = serde_json::from_str::<SkosCollection>(&content) {
vec![coll]
} else if let Ok(colls) = serde_json::from_str::<Vec<SkosCollection>>(&content)
{
colls
} else {
eprintln!(
"Warning: Failed to parse JSON collection {}: not a valid SkosCollection",
file.display(),
);
continue;
}
}
_ => continue,
};
for coll in parsed {
if seen_ids.insert(coll.id.clone()) {
collections.push(coll);
}
}
}
Ok(collections)
}
pub fn find_ontology_dirs(&self) -> Result<Vec<PathBuf>, LoaderError> {
let ontologies_dir = self.root_path.join("ontologies");
if !ontologies_dir.exists() {
return Ok(Vec::new());
}
let mut dirs = Vec::new();
for entry in fs::read_dir(&ontologies_dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() && path.join("ontology_config.json").exists() {
dirs.push(path);
}
}
Ok(dirs)
}
pub fn load_ontology_config(&self, ontology_dir: &Path) -> Result<OntologyConfig, LoaderError> {
let config_path = ontology_dir.join("ontology_config.json");
let content = fs::read_to_string(&config_path)?;
serde_json::from_str(&content).map_err(LoaderError::from)
}
pub fn collect_ontology_xml_contents(
&self,
ontology_dir: &Path,
) -> Result<Vec<String>, LoaderError> {
let config = self.load_ontology_config(ontology_dir)?;
let mut xml_contents = Vec::new();
let base_path = ontology_dir.join(&config.base);
xml_contents.push(fs::read_to_string(&base_path).map_err(|e| {
LoaderError::IoError(std::io::Error::new(
e.kind(),
format!(
"Failed to read ontology base file {}: {}",
base_path.display(),
e
),
))
})?);
for ext in &config.extensions {
let ext_path = ontology_dir.join(ext);
xml_contents.push(fs::read_to_string(&ext_path).map_err(|e| {
LoaderError::IoError(std::io::Error::new(
e.kind(),
format!(
"Failed to read ontology extension {}: {}",
ext_path.display(),
e
),
))
})?);
}
Ok(xml_contents)
}
pub fn load_ontology_validator(
&self,
ontology_dir: &Path,
) -> Result<OntologyValidator, LoaderError> {
let xml_contents = self.collect_ontology_xml_contents(ontology_dir)?;
let refs: Vec<&str> = xml_contents.iter().map(|s| s.as_str()).collect();
OntologyValidator::from_rdfs_xml(&refs).map_err(|e| LoaderError::GraphError(e.to_string()))
}
pub fn find_business_data_files(&self) -> Result<Vec<PathBuf>, LoaderError> {
let business_data_dir = self.root_path.join("business_data");
if !business_data_dir.exists() {
return Ok(Vec::new());
}
let mut files = Vec::new();
self.collect_json_files(&business_data_dir, &mut files)?;
Ok(files)
}
#[allow(clippy::only_used_in_recursion)]
fn collect_json_files(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<(), LoaderError> {
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
self.collect_json_files(&path, files)?;
} else if path.extension().map(|e| e == "json").unwrap_or(false) {
files.push(path);
}
}
Ok(())
}
pub fn load_resource_summaries_from_file(
&self,
path: &Path,
graph_id: &str,
) -> Result<Vec<StaticResourceSummary>, LoaderError> {
let content = fs::read_to_string(path)?;
let file: BusinessDataFile = serde_json::from_str(&content)?;
let summaries: Vec<StaticResourceSummary> = file
.business_data
.resources
.into_iter()
.filter(|r| r.resourceinstance.graph_id == graph_id)
.map(|r| r.to_summary())
.collect();
Ok(summaries)
}
pub fn load_resource_summaries(
&self,
graph_id: &str,
offset: usize,
limit: usize,
) -> Result<(Vec<StaticResourceSummary>, bool), LoaderError> {
let files = self.find_business_data_files()?;
let mut all_summaries = Vec::new();
for file in &files {
match self.load_resource_summaries_from_file(file, graph_id) {
Ok(summaries) => all_summaries.extend(summaries),
Err(e) => {
eprintln!(
"Warning: Failed to load resources from {}: {}",
file.display(),
e
);
}
}
}
let total = all_summaries.len();
let has_more = offset + limit < total;
let summaries: Vec<_> = all_summaries.into_iter().skip(offset).take(limit).collect();
Ok((summaries, has_more))
}
pub fn count_resources_for_graph(&self, graph_id: &str) -> Result<usize, LoaderError> {
let files = self.find_business_data_files()?;
let mut count = 0;
for file in &files {
count += self.fast_count_resources_in_file(file, graph_id)?;
}
Ok(count)
}
pub fn fast_count_resources_in_file(
&self,
path: &Path,
graph_id: &str,
) -> Result<usize, LoaderError> {
let content = fs::read_to_string(path)?;
let file_data: BusinessDataFileCount = serde_json::from_str(&content)?;
let count = file_data
.business_data
.resources
.iter()
.filter(|r| r.resourceinstance.graph_id == graph_id)
.count();
Ok(count)
}
pub fn get_business_data_file_counts(
&self,
graph_id: &str,
) -> Result<Vec<(PathBuf, usize)>, LoaderError> {
let files = self.find_business_data_files()?;
let mut result = Vec::with_capacity(files.len());
for file in files {
let count = self.fast_count_resources_in_file(&file, graph_id)?;
if count > 0 {
result.push((file, count));
}
}
Ok(result)
}
pub fn load_full_resources_from_file(
&self,
path: &Path,
graph_id: &str,
) -> Result<Vec<StaticResource>, LoaderError> {
let content = fs::read_to_string(path)?;
let file_data: BusinessDataFileFull = serde_json::from_str(&content)?;
let resources: Vec<StaticResource> = file_data
.business_data
.resources
.into_iter()
.filter(|r| r.resourceinstance.graph_id == graph_id)
.map(|r| r.to_static_resource())
.collect();
Ok(resources)
}
pub fn load_all_full_resources_from_file(
&self,
path: &Path,
) -> Result<Vec<StaticResource>, LoaderError> {
let content = fs::read_to_string(path)?;
if let Ok(file_data) = serde_json::from_str::<BusinessDataFileFull>(&content) {
let resources: Vec<StaticResource> = file_data
.business_data
.resources
.into_iter()
.map(|r| r.to_static_resource())
.collect();
Ok(resources)
} else {
let resource: BusinessDataResourceFull = serde_json::from_str(&content)?;
Ok(vec![resource.to_static_resource()])
}
}
pub fn load_full_resource(
&self,
resource_id: &str,
graph_id: &str,
) -> Result<StaticResource, LoaderError> {
let files = self.find_business_data_files()?;
for file in &files {
let content = fs::read_to_string(file)?;
let file_data: BusinessDataFileFull = serde_json::from_str(&content)?;
for resource in file_data.business_data.resources {
if resource.resourceinstance.resourceinstanceid == resource_id {
return Ok(resource.to_static_resource());
}
}
}
Err(LoaderError::NotFound(format!(
"Resource {} not found in graph {}",
resource_id, graph_id
)))
}
#[cfg(feature = "parallel")]
pub fn load_resources_parallel(
&self,
files: &[(PathBuf, usize)],
graph_id: &str,
tx: &Sender<Vec<StaticResourceSummary>>,
) -> Result<usize, LoaderError> {
use std::sync::atomic::{AtomicUsize, Ordering};
let total_loaded = AtomicUsize::new(0);
let graph_id = graph_id.to_string();
files.par_iter().for_each(|(file_path, _count)| {
if let Ok(summaries) = self.load_resource_summaries_from_file(file_path, &graph_id) {
if !summaries.is_empty() {
total_loaded.fetch_add(summaries.len(), Ordering::Relaxed);
let _ = tx.send(summaries);
}
}
});
Ok(total_loaded.load(Ordering::Relaxed))
}
#[cfg(not(feature = "parallel"))]
pub fn load_resources_parallel(
&self,
files: &[(PathBuf, usize)],
graph_id: &str,
tx: &Sender<Vec<StaticResourceSummary>>,
) -> Result<usize, LoaderError> {
let mut total_loaded = 0;
for (file_path, _count) in files {
if let Ok(summaries) = self.load_resource_summaries_from_file(file_path, graph_id) {
if !summaries.is_empty() {
total_loaded += summaries.len();
let _ = tx.send(summaries);
}
}
}
Ok(total_loaded)
}
#[cfg(feature = "parallel")]
pub fn count_resources_parallel(
&self,
files: &[PathBuf],
graph_id: &str,
) -> Vec<(PathBuf, usize)> {
files
.par_iter()
.filter_map(
|file| match self.fast_count_resources_in_file(file, graph_id) {
Ok(count) if count > 0 => Some((file.clone(), count)),
_ => None,
},
)
.collect()
}
#[cfg(not(feature = "parallel"))]
pub fn count_resources_parallel(
&self,
files: &[PathBuf],
graph_id: &str,
) -> Vec<(PathBuf, usize)> {
files
.iter()
.filter_map(
|file| match self.fast_count_resources_in_file(file, graph_id) {
Ok(count) if count > 0 => Some((file.clone(), count)),
_ => None,
},
)
.collect()
}
pub fn find_preindex_files(&self, _graph_id: &str) -> Result<Vec<PathBuf>, LoaderError> {
let preindex_dir = self.root_path.join("preindex");
if !preindex_dir.exists() {
return Ok(Vec::new());
}
let mut files = Vec::new();
self.collect_pi_files(&preindex_dir, &mut files)?;
Ok(files)
}
#[allow(clippy::only_used_in_recursion)]
fn collect_pi_files(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<(), LoaderError> {
for entry in fs::read_dir(dir)? {
let entry = entry?;
let path = entry.path();
if path.is_dir() {
self.collect_pi_files(&path, files)?;
} else if path.extension().map(|e| e == "pi").unwrap_or(false) {
files.push(path);
}
}
Ok(())
}
pub fn load_preindex_summaries(
&self,
graph_id: &str,
offset: usize,
limit: usize,
) -> Result<(Vec<StaticResourceSummary>, bool), LoaderError> {
let files = self.find_preindex_files(graph_id)?;
let mut all_summaries = Vec::new();
for file in &files {
match self.load_preindex_file(file, graph_id) {
Ok(summaries) => all_summaries.extend(summaries),
Err(e) => {
eprintln!(
"Warning: Failed to load preindex from {}: {}",
file.display(),
e
);
}
}
}
let total = all_summaries.len();
let has_more = offset + limit < total;
let summaries: Vec<_> = all_summaries.into_iter().skip(offset).take(limit).collect();
Ok((summaries, has_more))
}
fn load_preindex_file(
&self,
path: &Path,
graph_id: &str,
) -> Result<Vec<StaticResourceSummary>, LoaderError> {
let content = fs::read_to_string(path)?;
let mut summaries = Vec::new();
if let Ok(array) = serde_json::from_str::<Vec<StaticResourceSummary>>(&content) {
for summary in array {
if summary.graph_id == graph_id {
summaries.push(summary);
}
}
return Ok(summaries);
}
for line in content.lines() {
let line = line.trim();
if line.is_empty() {
continue;
}
if let Ok(summary) = serde_json::from_str::<StaticResourceSummary>(line) {
if summary.graph_id == graph_id {
summaries.push(summary);
}
}
}
Ok(summaries)
}
pub fn count_preindex_resources_for_graph(&self, graph_id: &str) -> Result<usize, LoaderError> {
let files = self.find_preindex_files(graph_id)?;
let mut count = 0;
for file in &files {
if let Ok(summaries) = self.load_preindex_file(file, graph_id) {
count += summaries.len();
}
}
Ok(count)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::graph::StaticGraph;
use std::path::PathBuf;
#[test]
fn test_loader_not_found() {
let result = PrebuildLoader::new("/nonexistent/path");
assert!(matches!(result, Err(LoaderError::NotFound(_))));
}
#[test]
fn test_parse_coral_format_json() {
let manifest_dir = env!("CARGO_MANIFEST_DIR");
let test_path = PathBuf::from(manifest_dir)
.parent()
.unwrap()
.parent()
.unwrap()
.join("tests/data/models/Person.json");
let content = std::fs::read_to_string(&test_path).expect("Failed to read test JSON file");
let data: serde_json::Value = serde_json::from_str(&content).expect("Failed to parse JSON");
let graph_json = &data["graph"][0];
assert!(
graph_json.get("source_identifier_id").is_none()
|| graph_json["source_identifier_id"].is_null()
);
let graph: StaticGraph = serde_json::from_value(graph_json.clone())
.expect("Failed to parse StaticGraph from Coral format");
assert!(!graph.graphid.is_empty());
assert!(graph.source_identifier_id.is_none()); assert!(graph.is_active.is_none()); assert!(!graph.nodes.is_empty());
}
#[test]
fn test_parse_arches_her_format_json() {
let json = r#"{
"graphid": "test-graph-id",
"name": {"en": "Test Graph"},
"nodes": [],
"edges": [],
"nodegroups": [],
"cards": [],
"cards_x_nodes_x_widgets": [],
"functions_x_graphs": [],
"root": {
"nodeid": "root-node-id",
"name": "Root Node",
"datatype": "semantic",
"graph_id": "test-graph-id"
},
"source_identifier_id": "some-source-id",
"is_active": true,
"has_unpublished_changes": false,
"is_copy_immutable": false
}"#;
let graph: StaticGraph =
serde_json::from_str(json).expect("Failed to parse StaticGraph with Arches-HER fields");
assert_eq!(graph.graphid, "test-graph-id");
assert_eq!(
graph.source_identifier_id,
Some("some-source-id".to_string())
);
assert_eq!(graph.is_active, Some(true));
assert_eq!(graph.has_unpublished_changes, Some(false));
}
}
pub fn parse_business_data_bytes(bytes: &[u8]) -> Result<Vec<StaticResource>, LoaderError> {
if let Ok(file_data) = serde_json::from_slice::<BusinessDataFileFull>(bytes) {
Ok(file_data
.business_data
.resources
.into_iter()
.map(|r| r.to_static_resource())
.collect())
} else {
let resource: BusinessDataResourceFull = serde_json::from_slice(bytes)?;
Ok(vec![resource.to_static_resource()])
}
}
pub struct ImportPrebuildResult {
pub graph_ids: Vec<String>,
pub collection_ids: Vec<String>,
pub collections: Vec<SkosCollection>,
pub ontology_validators: Vec<OntologyValidator>,
pub ontology_configs: Vec<OntologyConfig>,
}
pub fn load_collections_from_dir(
dir: &str,
base_uri: &str,
) -> Result<Vec<SkosCollection>, LoaderError> {
let dir_path = Path::new(dir);
if !dir_path.is_dir() {
return Ok(Vec::new());
}
let mut files: Vec<PathBuf> = Vec::new();
for entry in fs::read_dir(dir_path)? {
let entry = entry?;
let path = entry.path();
let ext = path.extension().and_then(|e| e.to_str());
if ext == Some("xml") || ext == Some("json") {
files.push(path);
}
}
files.sort();
let mut collections = Vec::new();
let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
for file in &files {
let content = fs::read_to_string(file)?;
let ext = file.extension().and_then(|e| e.to_str());
let parsed: Vec<SkosCollection> = match ext {
Some("xml") => match parse_skos_to_collections(&content, base_uri) {
Ok(p) => p,
Err(e) => {
eprintln!(
"Warning: Failed to parse XML collection {}: {}",
file.display(),
e
);
continue;
}
},
Some("json") => {
if let Ok(coll) = serde_json::from_str::<SkosCollection>(&content) {
vec![coll]
} else if let Ok(colls) = serde_json::from_str::<Vec<SkosCollection>>(&content) {
colls
} else {
eprintln!(
"Warning: Failed to parse JSON collection {}: not a valid SkosCollection",
file.display(),
);
continue;
}
}
_ => continue,
};
for coll in parsed {
if seen_ids.insert(coll.id.clone()) {
collections.push(coll);
}
}
}
Ok(collections)
}
pub fn load_ontology_xml_from_dir(dir: &str) -> Result<Vec<String>, LoaderError> {
let dir_path = Path::new(dir);
if !dir_path.is_dir() {
return Ok(Vec::new());
}
let mut files: Vec<PathBuf> = Vec::new();
for entry in fs::read_dir(dir_path)? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|e| e.to_str()) == Some("xml") {
files.push(path);
}
}
files.sort();
let mut contents = Vec::new();
for file in &files {
contents.push(fs::read_to_string(file).map_err(|e| {
LoaderError::IoError(std::io::Error::new(
e.kind(),
format!("Failed to read ontology file {}: {}", file.display(), e),
))
})?);
}
Ok(contents)
}
pub fn import_prebuild(
path: &str,
base_uri: &str,
extra_reference_data_dirs: Option<&[&str]>,
extra_ontology_dirs: Option<&[&str]>,
) -> Result<ImportPrebuildResult, LoaderError> {
crate::set_rdm_namespace(base_uri)
.map_err(|e| LoaderError::Other(format!("Failed to set RDM namespace: {}", e)))?;
let loader = PrebuildLoader::new(path)?;
let graphs = loader.load_all_graphs()?;
let graph_ids: Vec<String> = graphs
.into_iter()
.map(|g| {
let id = g.graphid.clone();
crate::register_graph_owned(g);
id
})
.collect();
let collections = loader.load_collections(base_uri)?;
let mut collection_ids = crate::add_to_global_rdm_cache_from_skos(&collections);
if let Some(dirs) = extra_reference_data_dirs {
for dir in dirs {
let extra_collections = load_collections_from_dir(dir, base_uri)?;
let extra_ids = crate::add_to_global_rdm_cache_from_skos(&extra_collections);
collection_ids.extend(extra_ids);
}
}
let ontology_dirs = loader.find_ontology_dirs()?;
let mut all_xml_contents = Vec::new();
let mut ontology_configs = Vec::new();
for dir in &ontology_dirs {
ontology_configs.push(loader.load_ontology_config(dir)?);
all_xml_contents.extend(loader.collect_ontology_xml_contents(dir)?);
}
if let Some(extra_dirs) = extra_ontology_dirs {
for dir in extra_dirs {
all_xml_contents.extend(load_ontology_xml_from_dir(dir)?);
}
}
let mut ontology_validators = Vec::new();
if !all_xml_contents.is_empty() {
let refs: Vec<&str> = all_xml_contents.iter().map(|s| s.as_str()).collect();
let validator = OntologyValidator::from_rdfs_xml(&refs)
.map_err(|e| LoaderError::GraphError(e.to_string()))?;
ontology_validators.push(validator);
}
Ok(ImportPrebuildResult {
graph_ids,
collection_ids,
collections,
ontology_validators,
ontology_configs,
})
}