1use crate::graph::{
7 IndexedGraph, StaticGraph, StaticResource, StaticResourceDescriptors, StaticResourceMetadata,
8 StaticResourceSummary, StaticTile,
9};
10use crate::ontology::{OntologyConfig, OntologyValidator};
11use crate::skos::{parse_skos_to_collections, SkosCollection};
12#[cfg(feature = "parallel")]
13use rayon::prelude::*;
14use serde::Deserialize;
15use std::collections::HashMap;
16use std::fs;
17use std::path::{Path, PathBuf};
18use std::sync::mpsc::Sender;
19
20#[derive(Debug, Deserialize)]
26struct BusinessDataFile {
27 business_data: BusinessDataContent,
28}
29
30#[derive(Debug, Deserialize)]
32struct BusinessDataContent {
33 #[serde(default)]
34 resources: Vec<BusinessDataResource>,
35}
36
37#[derive(Debug, Deserialize)]
39struct BusinessDataResource {
40 resourceinstance: BusinessDataResourceInstance,
41 #[serde(default)]
42 metadata: Option<HashMap<String, String>>,
43}
44
45#[derive(Debug, Deserialize)]
47struct BusinessDataResourceInstance {
48 resourceinstanceid: String,
49 graph_id: String,
50 name: String,
51 #[serde(default)]
52 descriptors: Option<FlexibleDescriptors>,
53 #[serde(default)]
54 createdtime: Option<String>,
55 #[serde(default)]
56 lastmodified: Option<String>,
57 #[serde(default)]
58 publication_id: Option<String>,
59 #[serde(default)]
60 principaluser_id: Option<i32>,
61 #[serde(default)]
62 legacyid: Option<String>,
63 #[serde(default)]
64 graph_publication_id: Option<String>,
65}
66
67#[derive(Debug)]
71struct FlexibleDescriptors {
72 resolved: StaticResourceDescriptors,
73}
74
75impl FlexibleDescriptors {
76 fn get_for_lang(&self, _lang: &str) -> Option<StaticResourceDescriptors> {
77 if self.resolved.is_empty() {
78 None
79 } else {
80 Some(self.resolved.clone())
81 }
82 }
83}
84
85impl<'de> Deserialize<'de> for FlexibleDescriptors {
86 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
87 where
88 D: serde::Deserializer<'de>,
89 {
90 let value = serde_json::Value::deserialize(deserializer)?;
91
92 if let Ok(flat) = serde_json::from_value::<StaticResourceDescriptors>(value.clone()) {
94 if !flat.is_empty() {
95 return Ok(FlexibleDescriptors { resolved: flat });
96 }
97 }
98
99 if let Ok(nested) =
101 serde_json::from_value::<HashMap<String, StaticResourceDescriptors>>(value)
102 {
103 let resolved = nested
104 .get("en")
105 .or_else(|| nested.values().next())
106 .cloned()
107 .unwrap_or_default();
108 return Ok(FlexibleDescriptors { resolved });
109 }
110
111 Ok(FlexibleDescriptors {
112 resolved: StaticResourceDescriptors::default(),
113 })
114 }
115}
116
117impl BusinessDataResource {
118 fn to_summary(&self) -> StaticResourceSummary {
120 let ri = &self.resourceinstance;
121 StaticResourceSummary {
122 resourceinstanceid: ri.resourceinstanceid.clone(),
123 graph_id: ri.graph_id.clone(),
124 name: ri.name.clone(),
125 descriptors: ri.descriptors.as_ref().and_then(|d| d.get_for_lang("en")),
126 metadata: self.metadata.clone().unwrap_or_default(),
127 createdtime: ri.createdtime.clone(),
128 lastmodified: ri.lastmodified.clone(),
129 publication_id: ri.publication_id.clone(),
130 principaluser_id: ri.principaluser_id,
131 legacyid: ri.legacyid.clone(),
132 graph_publication_id: ri.graph_publication_id.clone(),
133 }
134 }
135}
136
137#[derive(Debug, Deserialize)]
143struct BusinessDataFileCount {
144 business_data: BusinessDataContentCount,
145}
146
147#[derive(Debug, Deserialize)]
149struct BusinessDataContentCount {
150 #[serde(default)]
151 resources: Vec<BusinessDataResourceCount>,
152}
153
154#[derive(Debug, Deserialize)]
156struct BusinessDataResourceCount {
157 resourceinstance: BusinessDataResourceInstanceCount,
158}
159
160#[derive(Debug, Deserialize)]
161struct BusinessDataResourceInstanceCount {
162 graph_id: String,
163}
164
165#[derive(Debug, Deserialize)]
171struct BusinessDataFileFull {
172 business_data: BusinessDataContentFull,
173}
174
175#[derive(Debug, Deserialize)]
176struct BusinessDataContentFull {
177 #[serde(default)]
178 resources: Vec<BusinessDataResourceFull>,
179}
180
181#[derive(Debug, Deserialize)]
183struct BusinessDataResourceFull {
184 resourceinstance: BusinessDataResourceInstanceFull,
185 #[serde(default)]
186 tiles: Option<Vec<StaticTile>>,
187 #[serde(default)]
188 metadata: Option<HashMap<String, String>>,
189 #[serde(default, rename = "__cache")]
190 cache: Option<serde_json::Value>,
191 #[serde(default, rename = "__scopes")]
192 scopes: Option<serde_json::Value>,
193}
194
195#[derive(Debug, Deserialize)]
197struct BusinessDataResourceInstanceFull {
198 resourceinstanceid: String,
199 graph_id: String,
200 name: String,
201 #[serde(default)]
202 descriptors: Option<FlexibleDescriptors>,
203 #[serde(default)]
204 createdtime: Option<String>,
205 #[serde(default)]
206 lastmodified: Option<String>,
207 #[serde(default)]
208 publication_id: Option<String>,
209 #[serde(default)]
210 principaluser_id: Option<i32>,
211 #[serde(default)]
212 legacyid: Option<String>,
213 #[serde(default)]
214 graph_publication_id: Option<String>,
215}
216
217impl BusinessDataResourceFull {
218 fn to_static_resource(&self) -> StaticResource {
220 let ri = &self.resourceinstance;
221 let descriptors = ri
222 .descriptors
223 .as_ref()
224 .and_then(|d| d.get_for_lang("en"))
225 .unwrap_or_default();
226
227 StaticResource {
228 resourceinstance: StaticResourceMetadata {
229 resourceinstanceid: ri.resourceinstanceid.clone(),
230 graph_id: ri.graph_id.clone(),
231 name: ri.name.clone(),
232 descriptors,
233 createdtime: ri.createdtime.clone(),
234 lastmodified: ri.lastmodified.clone(),
235 publication_id: ri.publication_id.clone(),
236 principaluser_id: ri.principaluser_id,
237 legacyid: ri.legacyid.clone(),
238 graph_publication_id: ri.graph_publication_id.clone(),
239 },
240 tiles: self.tiles.clone(),
241 metadata: self.metadata.clone().unwrap_or_default(),
242 cache: self.cache.clone(),
243 scopes: self.scopes.clone(),
244 tiles_loaded: Some(true),
245 }
246 }
247}
248
249#[derive(Debug)]
251pub enum LoaderError {
252 IoError(std::io::Error),
253 JsonError(serde_json::Error),
254 GraphError(String),
255 NotFound(String),
256 Other(String),
257}
258
259impl std::fmt::Display for LoaderError {
260 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
261 match self {
262 LoaderError::IoError(e) => write!(f, "IO error: {}", e),
263 LoaderError::JsonError(e) => write!(f, "JSON error: {}", e),
264 LoaderError::GraphError(s) => write!(f, "Graph error: {}", s),
265 LoaderError::NotFound(s) => write!(f, "Not found: {}", s),
266 LoaderError::Other(s) => write!(f, "{}", s),
267 }
268 }
269}
270
271impl std::error::Error for LoaderError {}
272
273impl From<std::io::Error> for LoaderError {
274 fn from(e: std::io::Error) -> Self {
275 LoaderError::IoError(e)
276 }
277}
278
279impl From<serde_json::Error> for LoaderError {
280 fn from(e: serde_json::Error) -> Self {
281 LoaderError::JsonError(e)
282 }
283}
284
285#[derive(Debug, Clone)]
287pub struct PrebuildInfo {
288 pub path: PathBuf,
289 pub has_graphs: bool,
290 pub has_business_data: bool,
291 pub has_reference_data: bool,
292 pub has_index_templates: bool,
293 pub has_ontologies: bool,
294 pub graph_files: Vec<PathBuf>,
295}
296
297pub struct PrebuildLoader {
299 root_path: PathBuf,
300}
301
302impl PrebuildLoader {
303 pub fn new<P: AsRef<Path>>(path: P) -> Result<Self, LoaderError> {
305 let root_path = path.as_ref().to_path_buf();
306 if !root_path.exists() {
307 return Err(LoaderError::NotFound(format!(
308 "Prebuild directory not found: {}",
309 root_path.display()
310 )));
311 }
312 Ok(PrebuildLoader { root_path })
313 }
314
315 pub fn get_info(&self) -> Result<PrebuildInfo, LoaderError> {
317 let graphs_dir = self.root_path.join("graphs");
318 let business_data_dir = self.root_path.join("business_data");
319 let reference_data_dir = self.root_path.join("reference_data");
320 let index_templates_dir = self.root_path.join("indexTemplates");
321 let ontologies_dir = self.root_path.join("ontologies");
322
323 let graph_files = if graphs_dir.exists() {
324 self.find_graph_files(&graphs_dir)?
325 } else {
326 Vec::new()
327 };
328
329 Ok(PrebuildInfo {
330 path: self.root_path.clone(),
331 has_graphs: !graph_files.is_empty(),
332 has_business_data: business_data_dir.exists(),
333 has_reference_data: reference_data_dir.exists(),
334 has_index_templates: index_templates_dir.exists(),
335 has_ontologies: ontologies_dir.exists(),
336 graph_files,
337 })
338 }
339
340 fn find_graph_files(&self, graphs_dir: &Path) -> Result<Vec<PathBuf>, LoaderError> {
342 let mut files = Vec::new();
343
344 let resource_models = graphs_dir.join("resource_models");
346 if resource_models.exists() {
347 for entry in fs::read_dir(&resource_models)? {
348 let entry = entry?;
349 let path = entry.path();
350 if path.extension().map(|e| e == "json").unwrap_or(false) {
351 files.push(path);
352 }
353 }
354 }
355
356 let branches = graphs_dir.join("branches");
358 if branches.exists() {
359 for entry in fs::read_dir(&branches)? {
360 let entry = entry?;
361 let path = entry.path();
362 if path.extension().map(|e| e == "json").unwrap_or(false) {
363 files.push(path);
364 }
365 }
366 }
367
368 Ok(files)
369 }
370
371 pub fn load_graph<P: AsRef<Path>>(&self, path: P) -> Result<StaticGraph, LoaderError> {
373 let content = fs::read_to_string(path.as_ref())?;
374 StaticGraph::from_json_string(&content).map_err(LoaderError::GraphError)
375 }
376
377 pub fn load_indexed_graph<P: AsRef<Path>>(&self, path: P) -> Result<IndexedGraph, LoaderError> {
379 let graph = self.load_graph(path)?;
380 Ok(IndexedGraph::new(graph))
381 }
382
383 pub fn load_all_graphs(&self) -> Result<Vec<StaticGraph>, LoaderError> {
385 let info = self.get_info()?;
386 let mut graphs = Vec::new();
387
388 for path in &info.graph_files {
389 match self.load_graph(path) {
390 Ok(graph) => graphs.push(graph),
391 Err(e) => {
392 eprintln!("Warning: Failed to load graph {}: {}", path.display(), e);
393 }
394 }
395 }
396
397 Ok(graphs)
398 }
399
400 pub fn load_all_indexed_graphs(&self) -> Result<Vec<IndexedGraph>, LoaderError> {
402 let graphs = self.load_all_graphs()?;
403 Ok(graphs.into_iter().map(IndexedGraph::new).collect())
404 }
405
406 pub fn load_graphs_by_id(&self) -> Result<HashMap<String, IndexedGraph>, LoaderError> {
408 let graphs = self.load_all_indexed_graphs()?;
409 Ok(graphs
410 .into_iter()
411 .map(|g| (g.graph.graphid.clone(), g))
412 .collect())
413 }
414
415 pub fn get_subdir(&self, name: &str) -> PathBuf {
417 self.root_path.join(name)
418 }
419
420 pub fn root_path(&self) -> &Path {
422 &self.root_path
423 }
424
425 pub fn find_collection_files(&self) -> Result<Vec<PathBuf>, LoaderError> {
432 let reference_data = self.root_path.join("reference_data");
433 if !reference_data.exists() {
434 return Ok(Vec::new());
435 }
436
437 let mut files = Vec::new();
438 for subdir in &["concepts", "collections", "controlled_lists", "staging"] {
439 let dir = reference_data.join(subdir);
440 if !dir.is_dir() {
441 continue;
442 }
443 for entry in fs::read_dir(&dir)? {
444 let entry = entry?;
445 let path = entry.path();
446 let ext = path.extension().and_then(|e| e.to_str());
447 if ext == Some("xml") || ext == Some("json") {
448 files.push(path);
449 }
450 }
451 }
452 files.sort_by(|a, b| {
454 let ext_order = |p: &PathBuf| -> u8 {
455 match p.extension().and_then(|e| e.to_str()) {
456 Some("xml") => 0,
457 _ => 1,
458 }
459 };
460 ext_order(a).cmp(&ext_order(b)).then_with(|| a.cmp(b))
461 });
462 Ok(files)
463 }
464
465 pub fn load_collections(&self, base_uri: &str) -> Result<Vec<SkosCollection>, LoaderError> {
471 let files = self.find_collection_files()?;
472 let mut collections = Vec::new();
473 let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
474
475 for file in &files {
476 let content = fs::read_to_string(file)?;
477 let ext = file.extension().and_then(|e| e.to_str());
478
479 let parsed: Vec<SkosCollection> = match ext {
480 Some("xml") => match parse_skos_to_collections(&content, base_uri) {
481 Ok(p) => p,
482 Err(e) => {
483 eprintln!(
484 "Warning: Failed to parse XML collection {}: {}",
485 file.display(),
486 e
487 );
488 continue;
489 }
490 },
491 Some("json") => {
492 if let Ok(coll) = serde_json::from_str::<SkosCollection>(&content) {
494 vec![coll]
495 } else if let Ok(colls) = serde_json::from_str::<Vec<SkosCollection>>(&content)
496 {
497 colls
498 } else {
499 eprintln!(
500 "Warning: Failed to parse JSON collection {}: not a valid SkosCollection",
501 file.display(),
502 );
503 continue;
504 }
505 }
506 _ => continue,
507 };
508
509 for coll in parsed {
510 if seen_ids.insert(coll.id.clone()) {
511 collections.push(coll);
512 }
513 }
514 }
515
516 Ok(collections)
517 }
518
519 pub fn find_ontology_dirs(&self) -> Result<Vec<PathBuf>, LoaderError> {
525 let ontologies_dir = self.root_path.join("ontologies");
526 if !ontologies_dir.exists() {
527 return Ok(Vec::new());
528 }
529
530 let mut dirs = Vec::new();
531 for entry in fs::read_dir(&ontologies_dir)? {
532 let entry = entry?;
533 let path = entry.path();
534 if path.is_dir() && path.join("ontology_config.json").exists() {
535 dirs.push(path);
536 }
537 }
538 Ok(dirs)
539 }
540
541 pub fn load_ontology_config(&self, ontology_dir: &Path) -> Result<OntologyConfig, LoaderError> {
543 let config_path = ontology_dir.join("ontology_config.json");
544 let content = fs::read_to_string(&config_path)?;
545 serde_json::from_str(&content).map_err(LoaderError::from)
546 }
547
548 pub fn collect_ontology_xml_contents(
552 &self,
553 ontology_dir: &Path,
554 ) -> Result<Vec<String>, LoaderError> {
555 let config = self.load_ontology_config(ontology_dir)?;
556
557 let mut xml_contents = Vec::new();
558 let base_path = ontology_dir.join(&config.base);
559 xml_contents.push(fs::read_to_string(&base_path).map_err(|e| {
560 LoaderError::IoError(std::io::Error::new(
561 e.kind(),
562 format!(
563 "Failed to read ontology base file {}: {}",
564 base_path.display(),
565 e
566 ),
567 ))
568 })?);
569
570 for ext in &config.extensions {
571 let ext_path = ontology_dir.join(ext);
572 xml_contents.push(fs::read_to_string(&ext_path).map_err(|e| {
573 LoaderError::IoError(std::io::Error::new(
574 e.kind(),
575 format!(
576 "Failed to read ontology extension {}: {}",
577 ext_path.display(),
578 e
579 ),
580 ))
581 })?);
582 }
583
584 Ok(xml_contents)
585 }
586
587 pub fn load_ontology_validator(
590 &self,
591 ontology_dir: &Path,
592 ) -> Result<OntologyValidator, LoaderError> {
593 let xml_contents = self.collect_ontology_xml_contents(ontology_dir)?;
594 let refs: Vec<&str> = xml_contents.iter().map(|s| s.as_str()).collect();
595 OntologyValidator::from_rdfs_xml(&refs).map_err(|e| LoaderError::GraphError(e.to_string()))
596 }
597
598 pub fn find_business_data_files(&self) -> Result<Vec<PathBuf>, LoaderError> {
600 let business_data_dir = self.root_path.join("business_data");
601 if !business_data_dir.exists() {
602 return Ok(Vec::new());
603 }
604
605 let mut files = Vec::new();
606 self.collect_json_files(&business_data_dir, &mut files)?;
607 Ok(files)
608 }
609
610 #[allow(clippy::only_used_in_recursion)]
612 fn collect_json_files(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<(), LoaderError> {
613 for entry in fs::read_dir(dir)? {
614 let entry = entry?;
615 let path = entry.path();
616 if path.is_dir() {
617 self.collect_json_files(&path, files)?;
618 } else if path.extension().map(|e| e == "json").unwrap_or(false) {
619 files.push(path);
620 }
621 }
622 Ok(())
623 }
624
625 pub fn load_resource_summaries_from_file(
628 &self,
629 path: &Path,
630 graph_id: &str,
631 ) -> Result<Vec<StaticResourceSummary>, LoaderError> {
632 let content = fs::read_to_string(path)?;
633 let file: BusinessDataFile = serde_json::from_str(&content)?;
634
635 let summaries: Vec<StaticResourceSummary> = file
636 .business_data
637 .resources
638 .into_iter()
639 .filter(|r| r.resourceinstance.graph_id == graph_id)
640 .map(|r| r.to_summary())
641 .collect();
642
643 Ok(summaries)
644 }
645
646 pub fn load_resource_summaries(
649 &self,
650 graph_id: &str,
651 offset: usize,
652 limit: usize,
653 ) -> Result<(Vec<StaticResourceSummary>, bool), LoaderError> {
654 let files = self.find_business_data_files()?;
655 let mut all_summaries = Vec::new();
656
657 for file in &files {
658 match self.load_resource_summaries_from_file(file, graph_id) {
659 Ok(summaries) => all_summaries.extend(summaries),
660 Err(e) => {
661 eprintln!(
662 "Warning: Failed to load resources from {}: {}",
663 file.display(),
664 e
665 );
666 }
667 }
668 }
669
670 let total = all_summaries.len();
672 let has_more = offset + limit < total;
673 let summaries: Vec<_> = all_summaries.into_iter().skip(offset).take(limit).collect();
674
675 Ok((summaries, has_more))
676 }
677
678 pub fn count_resources_for_graph(&self, graph_id: &str) -> Result<usize, LoaderError> {
680 let files = self.find_business_data_files()?;
681 let mut count = 0;
682
683 for file in &files {
684 count += self.fast_count_resources_in_file(file, graph_id)?;
685 }
686
687 Ok(count)
688 }
689
690 pub fn fast_count_resources_in_file(
692 &self,
693 path: &Path,
694 graph_id: &str,
695 ) -> Result<usize, LoaderError> {
696 let content = fs::read_to_string(path)?;
697 let file_data: BusinessDataFileCount = serde_json::from_str(&content)?;
698
699 let count = file_data
700 .business_data
701 .resources
702 .iter()
703 .filter(|r| r.resourceinstance.graph_id == graph_id)
704 .count();
705
706 Ok(count)
707 }
708
709 pub fn get_business_data_file_counts(
712 &self,
713 graph_id: &str,
714 ) -> Result<Vec<(PathBuf, usize)>, LoaderError> {
715 let files = self.find_business_data_files()?;
716 let mut result = Vec::with_capacity(files.len());
717
718 for file in files {
719 let count = self.fast_count_resources_in_file(&file, graph_id)?;
720 if count > 0 {
721 result.push((file, count));
722 }
723 }
724
725 Ok(result)
726 }
727
728 pub fn load_full_resources_from_file(
733 &self,
734 path: &Path,
735 graph_id: &str,
736 ) -> Result<Vec<StaticResource>, LoaderError> {
737 let content = fs::read_to_string(path)?;
738 let file_data: BusinessDataFileFull = serde_json::from_str(&content)?;
739
740 let resources: Vec<StaticResource> = file_data
741 .business_data
742 .resources
743 .into_iter()
744 .filter(|r| r.resourceinstance.graph_id == graph_id)
745 .map(|r| r.to_static_resource())
746 .collect();
747
748 Ok(resources)
749 }
750
751 pub fn load_all_full_resources_from_file(
758 &self,
759 path: &Path,
760 ) -> Result<Vec<StaticResource>, LoaderError> {
761 let content = fs::read_to_string(path)?;
762
763 if let Ok(file_data) = serde_json::from_str::<BusinessDataFileFull>(&content) {
765 let resources: Vec<StaticResource> = file_data
766 .business_data
767 .resources
768 .into_iter()
769 .map(|r| r.to_static_resource())
770 .collect();
771 Ok(resources)
772 } else {
773 let resource: BusinessDataResourceFull = serde_json::from_str(&content)?;
774 Ok(vec![resource.to_static_resource()])
775 }
776 }
777
778 pub fn load_full_resource(
781 &self,
782 resource_id: &str,
783 graph_id: &str,
784 ) -> Result<StaticResource, LoaderError> {
785 let files = self.find_business_data_files()?;
786
787 for file in &files {
788 let content = fs::read_to_string(file)?;
789 let file_data: BusinessDataFileFull = serde_json::from_str(&content)?;
790
791 for resource in file_data.business_data.resources {
792 if resource.resourceinstance.resourceinstanceid == resource_id {
793 return Ok(resource.to_static_resource());
794 }
795 }
796 }
797
798 Err(LoaderError::NotFound(format!(
799 "Resource {} not found in graph {}",
800 resource_id, graph_id
801 )))
802 }
803
804 #[cfg(feature = "parallel")]
814 pub fn load_resources_parallel(
815 &self,
816 files: &[(PathBuf, usize)],
817 graph_id: &str,
818 tx: &Sender<Vec<StaticResourceSummary>>,
819 ) -> Result<usize, LoaderError> {
820 use std::sync::atomic::{AtomicUsize, Ordering};
821
822 let total_loaded = AtomicUsize::new(0);
823 let graph_id = graph_id.to_string();
824
825 files.par_iter().for_each(|(file_path, _count)| {
827 if let Ok(summaries) = self.load_resource_summaries_from_file(file_path, &graph_id) {
828 if !summaries.is_empty() {
829 total_loaded.fetch_add(summaries.len(), Ordering::Relaxed);
830 let _ = tx.send(summaries);
831 }
832 }
833 });
834
835 Ok(total_loaded.load(Ordering::Relaxed))
836 }
837
838 #[cfg(not(feature = "parallel"))]
840 pub fn load_resources_parallel(
841 &self,
842 files: &[(PathBuf, usize)],
843 graph_id: &str,
844 tx: &Sender<Vec<StaticResourceSummary>>,
845 ) -> Result<usize, LoaderError> {
846 let mut total_loaded = 0;
847
848 for (file_path, _count) in files {
849 if let Ok(summaries) = self.load_resource_summaries_from_file(file_path, graph_id) {
850 if !summaries.is_empty() {
851 total_loaded += summaries.len();
852 let _ = tx.send(summaries);
853 }
854 }
855 }
856
857 Ok(total_loaded)
858 }
859
860 #[cfg(feature = "parallel")]
862 pub fn count_resources_parallel(
863 &self,
864 files: &[PathBuf],
865 graph_id: &str,
866 ) -> Vec<(PathBuf, usize)> {
867 files
868 .par_iter()
869 .filter_map(
870 |file| match self.fast_count_resources_in_file(file, graph_id) {
871 Ok(count) if count > 0 => Some((file.clone(), count)),
872 _ => None,
873 },
874 )
875 .collect()
876 }
877
878 #[cfg(not(feature = "parallel"))]
880 pub fn count_resources_parallel(
881 &self,
882 files: &[PathBuf],
883 graph_id: &str,
884 ) -> Vec<(PathBuf, usize)> {
885 files
886 .iter()
887 .filter_map(
888 |file| match self.fast_count_resources_in_file(file, graph_id) {
889 Ok(count) if count > 0 => Some((file.clone(), count)),
890 _ => None,
891 },
892 )
893 .collect()
894 }
895
896 pub fn find_preindex_files(&self, _graph_id: &str) -> Result<Vec<PathBuf>, LoaderError> {
902 let preindex_dir = self.root_path.join("preindex");
903 if !preindex_dir.exists() {
904 return Ok(Vec::new());
905 }
906
907 let mut files = Vec::new();
908 self.collect_pi_files(&preindex_dir, &mut files)?;
909 Ok(files)
910 }
911
912 #[allow(clippy::only_used_in_recursion)]
914 fn collect_pi_files(&self, dir: &Path, files: &mut Vec<PathBuf>) -> Result<(), LoaderError> {
915 for entry in fs::read_dir(dir)? {
916 let entry = entry?;
917 let path = entry.path();
918 if path.is_dir() {
919 self.collect_pi_files(&path, files)?;
920 } else if path.extension().map(|e| e == "pi").unwrap_or(false) {
921 files.push(path);
922 }
923 }
924 Ok(())
925 }
926
927 pub fn load_preindex_summaries(
930 &self,
931 graph_id: &str,
932 offset: usize,
933 limit: usize,
934 ) -> Result<(Vec<StaticResourceSummary>, bool), LoaderError> {
935 let files = self.find_preindex_files(graph_id)?;
936 let mut all_summaries = Vec::new();
937
938 for file in &files {
939 match self.load_preindex_file(file, graph_id) {
940 Ok(summaries) => all_summaries.extend(summaries),
941 Err(e) => {
942 eprintln!(
943 "Warning: Failed to load preindex from {}: {}",
944 file.display(),
945 e
946 );
947 }
948 }
949 }
950
951 let total = all_summaries.len();
953 let has_more = offset + limit < total;
954 let summaries: Vec<_> = all_summaries.into_iter().skip(offset).take(limit).collect();
955
956 Ok((summaries, has_more))
957 }
958
959 fn load_preindex_file(
961 &self,
962 path: &Path,
963 graph_id: &str,
964 ) -> Result<Vec<StaticResourceSummary>, LoaderError> {
965 let content = fs::read_to_string(path)?;
966 let mut summaries = Vec::new();
967
968 if let Ok(array) = serde_json::from_str::<Vec<StaticResourceSummary>>(&content) {
970 for summary in array {
971 if summary.graph_id == graph_id {
972 summaries.push(summary);
973 }
974 }
975 return Ok(summaries);
976 }
977
978 for line in content.lines() {
980 let line = line.trim();
981 if line.is_empty() {
982 continue;
983 }
984 if let Ok(summary) = serde_json::from_str::<StaticResourceSummary>(line) {
985 if summary.graph_id == graph_id {
986 summaries.push(summary);
987 }
988 }
989 }
990
991 Ok(summaries)
992 }
993
994 pub fn count_preindex_resources_for_graph(&self, graph_id: &str) -> Result<usize, LoaderError> {
996 let files = self.find_preindex_files(graph_id)?;
997 let mut count = 0;
998
999 for file in &files {
1000 if let Ok(summaries) = self.load_preindex_file(file, graph_id) {
1001 count += summaries.len();
1002 }
1003 }
1004
1005 Ok(count)
1006 }
1007}
1008
1009#[cfg(test)]
1010mod tests {
1011 use super::*;
1012 use crate::graph::StaticGraph;
1013 use std::path::PathBuf;
1014
1015 #[test]
1016 fn test_loader_not_found() {
1017 let result = PrebuildLoader::new("/nonexistent/path");
1018 assert!(matches!(result, Err(LoaderError::NotFound(_))));
1019 }
1020
1021 #[test]
1022 fn test_parse_coral_format_json() {
1023 let manifest_dir = env!("CARGO_MANIFEST_DIR");
1025 let test_path = PathBuf::from(manifest_dir)
1026 .parent()
1027 .unwrap()
1028 .parent()
1029 .unwrap()
1030 .join("tests/data/models/Person.json");
1031
1032 let content = std::fs::read_to_string(&test_path).expect("Failed to read test JSON file");
1033
1034 let data: serde_json::Value = serde_json::from_str(&content).expect("Failed to parse JSON");
1035
1036 let graph_json = &data["graph"][0];
1037
1038 assert!(
1040 graph_json.get("source_identifier_id").is_none()
1041 || graph_json["source_identifier_id"].is_null()
1042 );
1043
1044 let graph: StaticGraph = serde_json::from_value(graph_json.clone())
1046 .expect("Failed to parse StaticGraph from Coral format");
1047
1048 assert!(!graph.graphid.is_empty());
1049 assert!(graph.source_identifier_id.is_none()); assert!(graph.is_active.is_none()); assert!(!graph.nodes.is_empty());
1052 }
1053
1054 #[test]
1055 fn test_parse_arches_her_format_json() {
1056 let json = r#"{
1058 "graphid": "test-graph-id",
1059 "name": {"en": "Test Graph"},
1060 "nodes": [],
1061 "edges": [],
1062 "nodegroups": [],
1063 "cards": [],
1064 "cards_x_nodes_x_widgets": [],
1065 "functions_x_graphs": [],
1066 "root": {
1067 "nodeid": "root-node-id",
1068 "name": "Root Node",
1069 "datatype": "semantic",
1070 "graph_id": "test-graph-id"
1071 },
1072 "source_identifier_id": "some-source-id",
1073 "is_active": true,
1074 "has_unpublished_changes": false,
1075 "is_copy_immutable": false
1076 }"#;
1077
1078 let graph: StaticGraph =
1079 serde_json::from_str(json).expect("Failed to parse StaticGraph with Arches-HER fields");
1080
1081 assert_eq!(graph.graphid, "test-graph-id");
1082 assert_eq!(
1083 graph.source_identifier_id,
1084 Some("some-source-id".to_string())
1085 );
1086 assert_eq!(graph.is_active, Some(true));
1087 assert_eq!(graph.has_unpublished_changes, Some(false));
1088 }
1089}
1090
1091pub fn parse_business_data_bytes(bytes: &[u8]) -> Result<Vec<StaticResource>, LoaderError> {
1106 if let Ok(file_data) = serde_json::from_slice::<BusinessDataFileFull>(bytes) {
1107 Ok(file_data
1108 .business_data
1109 .resources
1110 .into_iter()
1111 .map(|r| r.to_static_resource())
1112 .collect())
1113 } else {
1114 let resource: BusinessDataResourceFull = serde_json::from_slice(bytes)?;
1115 Ok(vec![resource.to_static_resource()])
1116 }
1117}
1118
1119pub struct ImportPrebuildResult {
1121 pub graph_ids: Vec<String>,
1122 pub collection_ids: Vec<String>,
1123 pub collections: Vec<SkosCollection>,
1124 pub ontology_validators: Vec<OntologyValidator>,
1125 pub ontology_configs: Vec<OntologyConfig>,
1126}
1127
1128pub fn load_collections_from_dir(
1134 dir: &str,
1135 base_uri: &str,
1136) -> Result<Vec<SkosCollection>, LoaderError> {
1137 let dir_path = Path::new(dir);
1138 if !dir_path.is_dir() {
1139 return Ok(Vec::new());
1140 }
1141
1142 let mut files: Vec<PathBuf> = Vec::new();
1143 for entry in fs::read_dir(dir_path)? {
1144 let entry = entry?;
1145 let path = entry.path();
1146 let ext = path.extension().and_then(|e| e.to_str());
1147 if ext == Some("xml") || ext == Some("json") {
1148 files.push(path);
1149 }
1150 }
1151 files.sort();
1152
1153 let mut collections = Vec::new();
1154 let mut seen_ids: std::collections::HashSet<String> = std::collections::HashSet::new();
1155
1156 for file in &files {
1157 let content = fs::read_to_string(file)?;
1158 let ext = file.extension().and_then(|e| e.to_str());
1159
1160 let parsed: Vec<SkosCollection> = match ext {
1161 Some("xml") => match parse_skos_to_collections(&content, base_uri) {
1162 Ok(p) => p,
1163 Err(e) => {
1164 eprintln!(
1165 "Warning: Failed to parse XML collection {}: {}",
1166 file.display(),
1167 e
1168 );
1169 continue;
1170 }
1171 },
1172 Some("json") => {
1173 if let Ok(coll) = serde_json::from_str::<SkosCollection>(&content) {
1174 vec![coll]
1175 } else if let Ok(colls) = serde_json::from_str::<Vec<SkosCollection>>(&content) {
1176 colls
1177 } else {
1178 eprintln!(
1179 "Warning: Failed to parse JSON collection {}: not a valid SkosCollection",
1180 file.display(),
1181 );
1182 continue;
1183 }
1184 }
1185 _ => continue,
1186 };
1187
1188 for coll in parsed {
1189 if seen_ids.insert(coll.id.clone()) {
1190 collections.push(coll);
1191 }
1192 }
1193 }
1194
1195 Ok(collections)
1196}
1197
1198pub fn load_ontology_xml_from_dir(dir: &str) -> Result<Vec<String>, LoaderError> {
1203 let dir_path = Path::new(dir);
1204 if !dir_path.is_dir() {
1205 return Ok(Vec::new());
1206 }
1207
1208 let mut files: Vec<PathBuf> = Vec::new();
1209 for entry in fs::read_dir(dir_path)? {
1210 let entry = entry?;
1211 let path = entry.path();
1212 if path.extension().and_then(|e| e.to_str()) == Some("xml") {
1213 files.push(path);
1214 }
1215 }
1216 files.sort();
1217
1218 let mut contents = Vec::new();
1219 for file in &files {
1220 contents.push(fs::read_to_string(file).map_err(|e| {
1221 LoaderError::IoError(std::io::Error::new(
1222 e.kind(),
1223 format!("Failed to read ontology file {}: {}", file.display(), e),
1224 ))
1225 })?);
1226 }
1227 Ok(contents)
1228}
1229
1230pub fn import_prebuild(
1239 path: &str,
1240 base_uri: &str,
1241 extra_reference_data_dirs: Option<&[&str]>,
1242 extra_ontology_dirs: Option<&[&str]>,
1243) -> Result<ImportPrebuildResult, LoaderError> {
1244 crate::set_rdm_namespace(base_uri)
1246 .map_err(|e| LoaderError::Other(format!("Failed to set RDM namespace: {}", e)))?;
1247
1248 let loader = PrebuildLoader::new(path)?;
1249
1250 let graphs = loader.load_all_graphs()?;
1252 let graph_ids: Vec<String> = graphs
1253 .into_iter()
1254 .map(|g| {
1255 let id = g.graphid.clone();
1256 crate::register_graph_owned(g);
1257 id
1258 })
1259 .collect();
1260
1261 let collections = loader.load_collections(base_uri)?;
1263 let mut collection_ids = crate::add_to_global_rdm_cache_from_skos(&collections);
1264
1265 if let Some(dirs) = extra_reference_data_dirs {
1267 for dir in dirs {
1268 let extra_collections = load_collections_from_dir(dir, base_uri)?;
1269 let extra_ids = crate::add_to_global_rdm_cache_from_skos(&extra_collections);
1270 collection_ids.extend(extra_ids);
1271 }
1272 }
1273
1274 let ontology_dirs = loader.find_ontology_dirs()?;
1276 let mut all_xml_contents = Vec::new();
1277 let mut ontology_configs = Vec::new();
1278 for dir in &ontology_dirs {
1279 ontology_configs.push(loader.load_ontology_config(dir)?);
1280 all_xml_contents.extend(loader.collect_ontology_xml_contents(dir)?);
1281 }
1282
1283 if let Some(extra_dirs) = extra_ontology_dirs {
1285 for dir in extra_dirs {
1286 all_xml_contents.extend(load_ontology_xml_from_dir(dir)?);
1287 }
1288 }
1289
1290 let mut ontology_validators = Vec::new();
1292 if !all_xml_contents.is_empty() {
1293 let refs: Vec<&str> = all_xml_contents.iter().map(|s| s.as_str()).collect();
1294 let validator = OntologyValidator::from_rdfs_xml(&refs)
1295 .map_err(|e| LoaderError::GraphError(e.to_string()))?;
1296 ontology_validators.push(validator);
1297 }
1298
1299 Ok(ImportPrebuildResult {
1300 graph_ids,
1301 collection_ids,
1302 collections,
1303 ontology_validators,
1304 ontology_configs,
1305 })
1306}