Skip to main content

alizarin_core/
rdm_cache.rs

1//! Core RDM Cache for concept lookups
2//!
3//! This module provides a platform-agnostic cache for Reference Data Manager (RDM) collections,
4//! enabling UUID -> label lookups for concept and concept-list datatypes.
5//!
6//! The WASM bindings (alizarin-wasm) wrap this with WasmRdmCache for JavaScript interop.
7
8use serde::{Deserialize, Deserializer, Serialize};
9use std::collections::{HashMap, HashSet};
10
11use crate::rdm_namespace::generate_value_uuid;
12
13// =============================================================================
14// RDM Value (label with its own ID)
15// =============================================================================
16
17/// A label value with its own ID (for StaticValue compatibility)
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct RdmValue {
20    /// Value ID (UUID) - unique identifier for this specific label
21    pub id: String,
22    /// The label text
23    pub value: String,
24    /// Back-reference to concept ID (not serialized, set during indexing)
25    #[serde(skip)]
26    pub concept_id: String,
27    /// Language code (not serialized, set during indexing)
28    #[serde(skip)]
29    pub language: String,
30}
31
32impl RdmValue {
33    /// Create a new RdmValue
34    pub fn new(id: String, value: String) -> Self {
35        Self {
36            id,
37            value,
38            concept_id: String::new(),
39            language: String::new(),
40        }
41    }
42
43    /// Create a new RdmValue with back-references
44    pub fn with_context(id: String, value: String, concept_id: String, language: String) -> Self {
45        Self {
46            id,
47            value,
48            concept_id,
49            language,
50        }
51    }
52
53    /// Generate a deterministic value ID from concept info
54    /// Uses UUID5 with namespace "value" and path: "{concept_id}/prefLabel/{value}/{language}"
55    ///
56    /// Delegates to `rdm_namespace::generate_value_uuid` for the actual generation.
57    pub fn generate_id(concept_id: &str, value: &str, language: &str) -> String {
58        generate_value_uuid(concept_id, value, language).to_string()
59    }
60}
61
62/// Intermediate type for deserializing pref_label that can be either:
63/// - A simple string: "Label"
64/// - A value object: { "id": "...", "value": "Label" }
65#[derive(Debug, Clone, Deserialize)]
66#[serde(untagged)]
67enum PrefLabelEntry {
68    Simple(String),
69    WithId { id: String, value: String },
70}
71
72// =============================================================================
73// RDM Concept
74// =============================================================================
75
76/// A concept from an RDM collection
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct RdmConcept {
79    /// Concept ID (UUID)
80    pub id: String,
81    /// Preferred labels by language code (with value IDs)
82    #[serde(
83        default,
84        alias = "prefLabel",
85        alias = "prefLabels",
86        deserialize_with = "deserialize_pref_labels"
87    )]
88    pub pref_label: HashMap<String, RdmValue>,
89    /// Alternative labels by language code
90    #[serde(default, rename = "altLabels")]
91    pub alt_labels: HashMap<String, Vec<String>>,
92    /// Broader concepts (parent IDs)
93    #[serde(default)]
94    pub broader: Vec<String>,
95    /// Narrower concepts (child IDs)
96    #[serde(default)]
97    pub narrower: Vec<String>,
98    /// Scope notes by language
99    #[serde(default, rename = "scopeNote")]
100    pub scope_note: HashMap<String, String>,
101}
102
103/// Custom deserializer that handles both JSON formats for pref_label:
104/// - Simple: { "en": "Label" }
105/// - WithId: { "en": { "id": "...", "value": "Label" } }
106fn deserialize_pref_labels<'de, D>(deserializer: D) -> Result<HashMap<String, RdmValue>, D::Error>
107where
108    D: Deserializer<'de>,
109{
110    let raw: HashMap<String, PrefLabelEntry> = HashMap::deserialize(deserializer)?;
111    let mut result = HashMap::new();
112
113    for (lang, entry) in raw {
114        let value = match entry {
115            PrefLabelEntry::Simple(text) => {
116                // Generate a placeholder ID - will be replaced during indexing
117                RdmValue::new("__pending__".to_string(), text)
118            }
119            PrefLabelEntry::WithId { id, value } => RdmValue::new(id, value),
120        };
121        result.insert(lang, value);
122    }
123
124    Ok(result)
125}
126
127impl RdmConcept {
128    /// Get the preferred label for a language, with fallbacks
129    pub fn get_label(&self, language: &str) -> Option<String> {
130        self.pref_label
131            .get(language)
132            .or_else(|| self.pref_label.get("en"))
133            .or_else(|| self.pref_label.values().next())
134            .map(|v| v.value.clone())
135    }
136
137    /// Get the RdmValue for a language, with fallbacks
138    pub fn get_value(&self, language: &str) -> Option<&RdmValue> {
139        self.pref_label
140            .get(language)
141            .or_else(|| self.pref_label.get("en"))
142            .or_else(|| self.pref_label.values().next())
143    }
144}
145
146// =============================================================================
147// RDM Collection
148// =============================================================================
149
150/// A collection of RDM concepts
151#[derive(Debug, Clone, Default)]
152pub struct RdmCollection {
153    /// Collection ID
154    pub id: String,
155    /// Collection name (optional, for display)
156    pub name: Option<String>,
157    /// Concepts indexed by their ID
158    concepts: HashMap<String, RdmConcept>,
159    /// Top-level concepts (no broader)
160    top_concepts: Vec<String>,
161    /// Index from VALUE ID to (concept_id, language) for fast lookup
162    value_index: HashMap<String, (String, String)>,
163}
164
165impl RdmCollection {
166    pub fn new(id: String) -> Self {
167        Self {
168            id,
169            name: None,
170            concepts: HashMap::new(),
171            top_concepts: vec![],
172            value_index: HashMap::new(),
173        }
174    }
175
176    /// Create a new collection with a name
177    pub fn with_name(id: String, name: String) -> Self {
178        Self {
179            id,
180            name: Some(name),
181            concepts: HashMap::new(),
182            top_concepts: vec![],
183            value_index: HashMap::new(),
184        }
185    }
186
187    /// Add a concept to the collection
188    ///
189    /// This also builds the value index for all labels in the concept.
190    /// If a label has a placeholder ID ("__pending__"), a deterministic ID is generated.
191    pub fn add_concept(&mut self, mut concept: RdmConcept) {
192        let concept_id = concept.id.clone();
193
194        // Process each label: set back-references and build value index
195        for (lang, value) in concept.pref_label.iter_mut() {
196            // Generate ID if it's a placeholder
197            if value.id == "__pending__" {
198                value.id = RdmValue::generate_id(&concept_id, &value.value, lang);
199            }
200
201            // Set back-references
202            value.concept_id = concept_id.clone();
203            value.language = lang.clone();
204
205            // Add to value index
206            self.value_index
207                .insert(value.id.clone(), (concept_id.clone(), lang.clone()));
208        }
209
210        if concept.broader.is_empty() {
211            self.top_concepts.push(concept_id.clone());
212        }
213        self.concepts.insert(concept_id, concept);
214    }
215
216    /// Get top-level concepts (no broader)
217    pub fn get_top_concepts(&self) -> Vec<&RdmConcept> {
218        self.top_concepts
219            .iter()
220            .filter_map(|id| self.concepts.get(id))
221            .collect()
222    }
223
224    /// Get a concept by ID
225    pub fn get_concept(&self, concept_id: &str) -> Option<&RdmConcept> {
226        self.concepts.get(concept_id)
227    }
228
229    /// Get a mutable concept by ID
230    pub fn get_concept_mut(&mut self, concept_id: &str) -> Option<&mut RdmConcept> {
231        self.concepts.get_mut(concept_id)
232    }
233
234    /// Get the label for a concept in this collection
235    pub fn get_label(&self, concept_id: &str, language: &str) -> Option<String> {
236        self.get_concept(concept_id)
237            .and_then(|c| c.get_label(language))
238    }
239
240    /// Parse collection from JSON array of concepts
241    pub fn from_concepts_json(id: String, json: &str) -> Result<Self, String> {
242        let concepts: Vec<RdmConcept> = serde_json::from_str(json)
243            .map_err(|e| format!("Failed to parse concepts JSON: {}", e))?;
244
245        let mut collection = Self::new(id);
246        for concept in concepts {
247            collection.add_concept(concept);
248        }
249        Ok(collection)
250    }
251
252    /// Get the number of concepts in this collection
253    pub fn len(&self) -> usize {
254        self.concepts.len()
255    }
256
257    /// Check if the collection is empty
258    pub fn is_empty(&self) -> bool {
259        self.concepts.is_empty()
260    }
261
262    /// Check if a concept exists in the collection
263    pub fn has_concept(&self, concept_id: &str) -> bool {
264        self.concepts.contains_key(concept_id)
265    }
266
267    /// Get all concept IDs
268    pub fn get_concept_ids(&self) -> Vec<&String> {
269        self.concepts.keys().collect()
270    }
271
272    /// Get the first parent ID for a concept (from broader field)
273    ///
274    /// Returns None if the concept doesn't exist or has no parent (top-level concept).
275    /// SKOS concepts can have multiple parents; this returns only the first one.
276    pub fn get_parent_id(&self, concept_id: &str) -> Option<String> {
277        self.get_concept(concept_id)
278            .and_then(|c| c.broader.first().cloned())
279    }
280
281    // =========================================================================
282    // Value ID Lookups (for StaticValue compatibility)
283    // =========================================================================
284
285    /// Look up a value by its VALUE ID
286    ///
287    /// This is the primary lookup method used by ViewModels.
288    /// Returns None if the value ID is not found in this collection.
289    pub fn get_value_by_id(&self, value_id: &str) -> Option<&RdmValue> {
290        self.value_index
291            .get(value_id)
292            .and_then(|(concept_id, lang)| {
293                self.concepts
294                    .get(concept_id)
295                    .and_then(|c| c.pref_label.get(lang))
296            })
297    }
298
299    /// Get concept ID from value ID
300    ///
301    /// Returns the concept ID that contains the given value ID.
302    pub fn get_concept_id_for_value(&self, value_id: &str) -> Option<&str> {
303        self.value_index
304            .get(value_id)
305            .map(|(concept_id, _)| concept_id.as_str())
306    }
307
308    /// Check if a value ID exists in this collection
309    pub fn has_value(&self, value_id: &str) -> bool {
310        self.value_index.contains_key(value_id)
311    }
312
313    /// Get all value IDs in this collection
314    pub fn get_value_ids(&self) -> Vec<&String> {
315        self.value_index.keys().collect()
316    }
317
318    // =========================================================================
319    // Label-based Lookups
320    // =========================================================================
321
322    /// Find a concept by exact label match (case-insensitive)
323    ///
324    /// Searches pref_label and alt_labels across all languages.
325    /// Returns the first match if found. When multiple concepts share the same
326    /// label (common in hierarchical collections like administrative areas),
327    /// a deterministic result is returned by selecting the concept with the
328    /// lexicographically smallest ID.
329    pub fn find_by_label(&self, label: &str) -> Option<&RdmConcept> {
330        let label_lower = label.trim().to_lowercase();
331        let mut matches: Vec<_> = self
332            .concepts
333            .values()
334            .filter(|c| {
335                // Check pref_label in any language (trim stored values too)
336                c.pref_label.values().any(|p| p.value.trim().to_lowercase() == label_lower) ||
337                // Check alt_labels in any language
338                c.alt_labels.values().any(|alts|
339                    alts.iter().any(|l| l.trim().to_lowercase() == label_lower)
340                )
341            })
342            .collect();
343
344        // Sort by ID for deterministic results when multiple concepts share a label
345        matches.sort_by(|a, b| a.id.cmp(&b.id));
346        matches.into_iter().next()
347    }
348
349    /// Find all concepts by exact label match (case-insensitive)
350    pub fn find_all_by_label(&self, label: &str) -> Vec<&RdmConcept> {
351        let label_lower = label.trim().to_lowercase();
352        self.concepts
353            .values()
354            .filter(|c| {
355                c.pref_label
356                    .values()
357                    .any(|p| p.value.trim().to_lowercase() == label_lower)
358                    || c.alt_labels
359                        .values()
360                        .any(|alts| alts.iter().any(|l| l.trim().to_lowercase() == label_lower))
361            })
362            .collect()
363    }
364
365    /// Search concepts by label prefix (case-insensitive)
366    pub fn search(&self, query: &str, language: Option<&str>) -> Vec<&RdmConcept> {
367        let lang = language.unwrap_or("en");
368        let query_lower = query.to_lowercase();
369
370        self.concepts
371            .values()
372            .filter(|c| {
373                // Check pref_label
374                if let Some(label) = c.pref_label.get(lang) {
375                    if label.value.to_lowercase().starts_with(&query_lower) {
376                        return true;
377                    }
378                }
379                // Check alt_labels
380                if let Some(alts) = c.alt_labels.get(lang) {
381                    if alts
382                        .iter()
383                        .any(|l| l.to_lowercase().starts_with(&query_lower))
384                    {
385                        return true;
386                    }
387                }
388                false
389            })
390            .collect()
391    }
392}
393
394// =============================================================================
395// RDM Cache
396// =============================================================================
397
398/// Cache for RDM collections, enabling concept UUID -> label lookups
399#[derive(Debug, Clone, Default)]
400pub struct RdmCache {
401    collections: HashMap<String, RdmCollection>,
402}
403
404impl RdmCache {
405    /// Create a new empty cache
406    pub fn new() -> Self {
407        Self {
408            collections: HashMap::new(),
409        }
410    }
411
412    /// Add a collection from JSON
413    ///
414    /// @param collection_id - The collection identifier
415    /// @param concepts_json - JSON array of concepts with {id, prefLabel: {lang: label}}
416    pub fn add_collection_from_json(
417        &mut self,
418        collection_id: &str,
419        concepts_json: &str,
420    ) -> Result<(), String> {
421        let collection =
422            RdmCollection::from_concepts_json(collection_id.to_string(), concepts_json)?;
423
424        self.collections
425            .insert(collection_id.to_string(), collection);
426        Ok(())
427    }
428
429    /// Add a collection, merging with any existing collection that has the same ID.
430    ///
431    /// When merging:
432    /// - New concepts with labels always win over existing bare stubs (no labels).
433    /// - Existing concepts with labels are kept if the incoming concept is bare.
434    /// - If both have labels, the incoming concept wins (last-writer-wins).
435    /// - New concepts that don't exist yet are always added.
436    pub fn add_collection(&mut self, collection: RdmCollection) {
437        if let Some(existing) = self.collections.get_mut(&collection.id) {
438            // Merge: iterate over incoming concepts
439            for (concept_id, incoming_concept) in collection.concepts {
440                let incoming_has_labels = !incoming_concept.pref_label.is_empty();
441                let should_insert = match existing.concepts.get(&concept_id) {
442                    None => true, // New concept — always add
443                    Some(existing_concept) => {
444                        let existing_has_labels = !existing_concept.pref_label.is_empty();
445                        // Replace if incoming has labels, or if existing is also bare
446                        incoming_has_labels || !existing_has_labels
447                    }
448                };
449                if should_insert {
450                    existing.add_concept(incoming_concept);
451                }
452            }
453            // Merge top_concepts (deduplicated by add_concept)
454            // Update name if incoming has one
455            if collection.name.is_some() {
456                existing.name = collection.name;
457            }
458        } else {
459            self.collections.insert(collection.id.clone(), collection);
460        }
461    }
462
463    /// Check if a collection is loaded
464    pub fn has_collection(&self, collection_id: &str) -> bool {
465        self.collections.contains_key(collection_id)
466    }
467
468    /// Get all loaded collection IDs
469    pub fn get_collection_ids(&self) -> Vec<String> {
470        self.collections.keys().cloned().collect()
471    }
472
473    /// Look up the label for a concept
474    ///
475    /// @param collection_id - The collection to search in
476    /// @param concept_id - The concept UUID
477    /// @param language - The language code (e.g., "en")
478    /// @returns The label string, or None if not found
479    pub fn lookup_label(
480        &self,
481        collection_id: &str,
482        concept_id: &str,
483        language: &str,
484    ) -> Option<String> {
485        self.collections
486            .get(collection_id)
487            .and_then(|c| c.get_label(concept_id, language))
488    }
489
490    /// Look up full concept info
491    pub fn lookup_concept(&self, collection_id: &str, concept_id: &str) -> Option<&RdmConcept> {
492        self.collections
493            .get(collection_id)
494            .and_then(|c| c.get_concept(concept_id))
495    }
496
497    /// Get the first parent ID for a concept
498    ///
499    /// Returns None if the collection doesn't exist, concept doesn't exist,
500    /// or concept has no parent (top-level concept).
501    pub fn get_parent_id(&self, collection_id: &str, concept_id: &str) -> Option<String> {
502        self.collections
503            .get(collection_id)
504            .and_then(|c| c.get_parent_id(concept_id))
505    }
506
507    // =========================================================================
508    // Value ID Lookups (for StaticValue compatibility)
509    // =========================================================================
510
511    /// Look up a value by its VALUE ID
512    ///
513    /// This is the primary lookup method used by ViewModels.
514    /// Returns None if the collection or value ID is not found.
515    pub fn lookup_value(&self, collection_id: &str, value_id: &str) -> Option<&RdmValue> {
516        self.collections
517            .get(collection_id)
518            .and_then(|c| c.get_value_by_id(value_id))
519    }
520
521    /// Get concept ID from value ID
522    ///
523    /// Returns the concept ID that contains the given value ID.
524    pub fn get_concept_id_for_value(&self, collection_id: &str, value_id: &str) -> Option<&str> {
525        self.collections
526            .get(collection_id)
527            .and_then(|c| c.get_concept_id_for_value(value_id))
528    }
529
530    /// Validate that a value exists in a collection
531    pub fn validate_value(&self, collection_id: &str, value_id: &str) -> bool {
532        self.collections
533            .get(collection_id)
534            .map(|c| c.has_value(value_id))
535            .unwrap_or(false)
536    }
537
538    /// Get a collection by ID
539    pub fn get_collection(&self, collection_id: &str) -> Option<&RdmCollection> {
540        self.collections.get(collection_id)
541    }
542
543    /// Get a mutable reference to a collection by ID
544    pub fn get_collection_mut(&mut self, collection_id: &str) -> Option<&mut RdmCollection> {
545        self.collections.get_mut(collection_id)
546    }
547
548    /// Clear all cached collections
549    pub fn clear(&mut self) {
550        self.collections.clear();
551    }
552
553    /// Remove a specific collection from the cache
554    pub fn remove_collection(&mut self, collection_id: &str) -> bool {
555        self.collections.remove(collection_id).is_some()
556    }
557
558    /// Get the number of cached collections
559    pub fn len(&self) -> usize {
560        self.collections.len()
561    }
562
563    /// Check if the cache is empty
564    pub fn is_empty(&self) -> bool {
565        self.collections.is_empty()
566    }
567
568    /// Validate that a concept exists in a collection
569    pub fn validate_concept(&self, collection_id: &str, concept_id: &str) -> bool {
570        self.collections
571            .get(collection_id)
572            .map(|c| c.has_concept(concept_id))
573            .unwrap_or(false)
574    }
575
576    /// Look up a concept by label in a specific collection
577    ///
578    /// Returns the concept if exactly one match is found.
579    /// Returns None if no match or ambiguous (multiple matches).
580    pub fn lookup_by_label(&self, collection_id: &str, label: &str) -> Option<&RdmConcept> {
581        self.collections
582            .get(collection_id)
583            .and_then(|c| c.find_by_label(label))
584    }
585
586    /// Look up a concept by label, returning all matches
587    pub fn lookup_all_by_label(&self, collection_id: &str, label: &str) -> Vec<&RdmConcept> {
588        self.collections
589            .get(collection_id)
590            .map(|c| c.find_all_by_label(label))
591            .unwrap_or_default()
592    }
593
594    /// Search across all collections (for autocomplete)
595    pub fn search_all(&self, query: &str, language: Option<&str>) -> Vec<(&str, &RdmConcept)> {
596        self.collections
597            .iter()
598            .flat_map(|(coll_id, collection)| {
599                collection
600                    .search(query, language)
601                    .into_iter()
602                    .map(move |c| (coll_id.as_str(), c))
603            })
604            .collect()
605    }
606}
607
608// =============================================================================
609// SKOS → RDM Conversion
610// =============================================================================
611
612use crate::skos::{SkosCollection, SkosConcept, SkosNodeType, SkosValue};
613
614impl RdmCache {
615    /// Convert a `SkosCollection` to an `RdmCollection` and add it to the cache.
616    ///
617    /// Returns the collection ID.
618    pub fn add_from_skos_collection(&mut self, skos: &SkosCollection) -> String {
619        let rdm = skos_to_rdm_collection(skos);
620        let id = rdm.id.clone();
621        self.add_collection(rdm);
622        id
623    }
624
625    /// Add multiple SKOS collections to the cache.
626    ///
627    /// After adding, enriches any existing collections that have bare concept stubs
628    /// (concepts with no labels) if the newly-added data provides labels for those
629    /// concept IDs. This handles the common Arches pattern where collections.xml and
630    /// concepts.xml are separate files.
631    ///
632    /// Returns the list of collection IDs added.
633    pub fn add_from_skos_collections(&mut self, collections: &[SkosCollection]) -> Vec<String> {
634        let added_ids: Vec<String> = collections
635            .iter()
636            .map(|skos| self.add_from_skos_collection(skos))
637            .collect();
638
639        // Cross-collection enrichment: if any existing collection has bare concepts
640        // (no labels), and a newly-added collection has the same concept ID with labels,
641        // copy the labels across.
642        self.enrich_bare_concepts(&added_ids);
643
644        added_ids
645    }
646
647    /// Enrich bare concepts across all collections.
648    ///
649    /// A "bare concept" is one that exists in a collection (as a member reference)
650    /// but has no pref_labels — typically because it was loaded from a collections.xml
651    /// that only declared member URIs without inline concept definitions.
652    ///
653    /// This checks bidirectionally: newly-added collections can enrich existing ones,
654    /// and existing collections can enrich newly-added ones.
655    fn enrich_bare_concepts(&mut self, newly_added_ids: &[String]) {
656        // Build a global lookup of concept_id -> labels from ALL collections
657        let mut concept_labels: HashMap<String, HashMap<String, RdmValue>> = HashMap::new();
658
659        for coll in self.collections.values() {
660            for (concept_id, concept) in &coll.concepts {
661                if !concept.pref_label.is_empty() {
662                    concept_labels
663                        .entry(concept_id.clone())
664                        .or_insert_with(|| concept.pref_label.clone());
665                }
666            }
667        }
668
669        if concept_labels.is_empty() {
670            return;
671        }
672
673        // Find and enrich bare concepts in all collections that were involved
674        // (either newly added or existing ones that reference newly-added concepts)
675        let all_collection_ids: Vec<String> = self.collections.keys().cloned().collect();
676        for coll_id in &all_collection_ids {
677            let needs_enrichment: Vec<String> = {
678                if let Some(coll) = self.collections.get(coll_id) {
679                    coll.concepts
680                        .iter()
681                        .filter(|(_, concept)| concept.pref_label.is_empty())
682                        .filter(|(id, _)| concept_labels.contains_key(*id))
683                        .map(|(id, _)| id.clone())
684                        .collect()
685                } else {
686                    vec![]
687                }
688            };
689
690            if needs_enrichment.is_empty() {
691                continue;
692            }
693
694            // Only enrich if this collection is newly added OR if it references
695            // concepts from a newly-added collection
696            let dominated_by_new = newly_added_ids.contains(coll_id)
697                || needs_enrichment
698                    .iter()
699                    .any(|cid| self.concept_in_collections(cid, newly_added_ids));
700
701            if !dominated_by_new {
702                continue;
703            }
704
705            if let Some(coll) = self.collections.get_mut(coll_id) {
706                for concept_id in needs_enrichment {
707                    if let Some(labels) = concept_labels.get(&concept_id) {
708                        // Build value index entries first
709                        let mut new_index_entries: Vec<(String, String, String)> = Vec::new();
710                        let mut enriched_labels = labels.clone();
711                        for (lang, value) in enriched_labels.iter_mut() {
712                            value.concept_id = concept_id.clone();
713                            value.language = lang.clone();
714                            new_index_entries.push((
715                                value.id.clone(),
716                                concept_id.clone(),
717                                lang.clone(),
718                            ));
719                        }
720
721                        // Apply to concept
722                        if let Some(concept) = coll.get_concept_mut(&concept_id) {
723                            concept.pref_label = enriched_labels;
724                        }
725
726                        // Update value index
727                        for (value_id, cid, lang) in new_index_entries {
728                            coll.value_index.insert(value_id, (cid, lang));
729                        }
730                    }
731                }
732            }
733        }
734    }
735
736    /// Check if a concept ID exists (with labels) in any of the specified collections.
737    fn concept_in_collections(&self, concept_id: &str, collection_ids: &[String]) -> bool {
738        collection_ids.iter().any(|coll_id| {
739            self.collections
740                .get(coll_id)
741                .and_then(|c| c.get_concept(concept_id))
742                .map(|concept| !concept.pref_label.is_empty())
743                .unwrap_or(false)
744        })
745    }
746}
747
748/// Convert a `SkosCollection` (parsed from SKOS XML or JSON) to an `RdmCollection`
749/// suitable for label lookups.
750///
751/// Walks the hierarchical `concepts` tree recursively, setting broader/narrower
752/// relationships. Falls back to `all_concepts` for flat structures.
753pub fn skos_to_rdm_collection(skos: &SkosCollection) -> RdmCollection {
754    let mut rdm = RdmCollection::with_name(
755        skos.id.clone(),
756        skos.pref_labels
757            .get("en")
758            .map(|v| v.value.clone())
759            .unwrap_or_else(|| skos.id.clone()),
760    );
761
762    fn add_concept_recursive(
763        rdm: &mut RdmCollection,
764        skos_concept: &SkosConcept,
765        parent_id: Option<&str>,
766    ) {
767        let mut pref_label: HashMap<String, RdmValue> = HashMap::new();
768        for (lang, skos_value) in &skos_concept.pref_labels {
769            pref_label.insert(
770                lang.clone(),
771                RdmValue::new(skos_value.id.clone(), skos_value.value.clone()),
772            );
773        }
774
775        let narrower: Vec<String> = skos_concept
776            .children
777            .as_ref()
778            .map(|children| children.iter().map(|c| c.id.clone()).collect())
779            .unwrap_or_default();
780
781        let broader = parent_id.map(|p| vec![p.to_string()]).unwrap_or_default();
782
783        let rdm_concept = RdmConcept {
784            id: skos_concept.id.clone(),
785            pref_label,
786            alt_labels: HashMap::new(),
787            broader,
788            narrower,
789            scope_note: HashMap::new(),
790        };
791
792        rdm.add_concept(rdm_concept);
793
794        if let Some(ref children) = skos_concept.children {
795            for child in children {
796                add_concept_recursive(rdm, child, Some(&skos_concept.id));
797            }
798        }
799    }
800
801    for skos_concept in skos.concepts.values() {
802        add_concept_recursive(&mut rdm, skos_concept, None);
803    }
804
805    // Fallback for flat structures (all_concepts without hierarchy)
806    if rdm.is_empty() && !skos.all_concepts.is_empty() {
807        for skos_concept in skos.all_concepts.values() {
808            if !rdm.has_concept(&skos_concept.id) {
809                add_concept_recursive(&mut rdm, skos_concept, None);
810            }
811        }
812    }
813
814    rdm
815}
816
817/// Convert an `RdmCollection` to a `SkosCollection` for SKOS XML serialization.
818///
819/// This is the inverse of `skos_to_rdm_collection`. The `node_type` parameter
820/// determines whether the output uses `skos:ConceptScheme` (with narrower/broader)
821/// or `skos:Collection` (with member relations, Arches-compatible).
822pub fn rdm_to_skos_collection(rdm: &RdmCollection, node_type: &str) -> SkosCollection {
823    rdm_to_skos_collection_excluding(rdm, node_type, &HashSet::new())
824}
825
826/// Convert an [`RdmCollection`] to a [`SkosCollection`], excluding any concept
827/// IDs in `exclude_ids`. This is used during export to avoid emitting the same
828/// concept in multiple XML files (which causes duplicate-key errors in the
829/// `arches_controlled_lists` importer).
830pub fn rdm_to_skos_collection_excluding(
831    rdm: &RdmCollection,
832    node_type: &str,
833    exclude_ids: &HashSet<String>,
834) -> SkosCollection {
835    // Build collection pref_labels
836    let mut collection_pref_labels = HashMap::new();
837    if let Some(ref name) = rdm.name {
838        collection_pref_labels.insert(
839            "en".to_string(),
840            SkosValue {
841                id: generate_value_uuid(&rdm.id, name, "en").to_string(),
842                value: name.clone(),
843            },
844        );
845    }
846
847    // Convert all concepts (flat list first), skipping excluded IDs
848    let mut all_skos_concepts: HashMap<String, SkosConcept> = HashMap::new();
849    let mut all_narrower_ids: HashSet<String> = HashSet::new();
850
851    for concept_id in rdm.get_concept_ids() {
852        if exclude_ids.contains(concept_id.as_str()) {
853            continue;
854        }
855        if let Some(rdm_concept) = rdm.get_concept(concept_id) {
856            let mut pref_labels = HashMap::new();
857            for (lang, rdm_value) in &rdm_concept.pref_label {
858                let value_id = if rdm_value.id.is_empty() || rdm_value.id == "__pending__" {
859                    generate_value_uuid(concept_id, &rdm_value.value, lang).to_string()
860                } else {
861                    rdm_value.id.clone()
862                };
863                pref_labels.insert(
864                    lang.clone(),
865                    SkosValue {
866                        id: value_id,
867                        value: rdm_value.value.clone(),
868                    },
869                );
870            }
871
872            let skos_concept = SkosConcept {
873                id: concept_id.clone(),
874                uri: None,
875                pref_labels,
876                source: Some(concept_id.clone()),
877                sort_order: None,
878                children: None,
879            };
880
881            all_skos_concepts.insert(concept_id.clone(), skos_concept);
882            all_narrower_ids.extend(rdm_concept.narrower.iter().cloned());
883        }
884    }
885
886    let skos_node_type = if node_type == "Collection" {
887        SkosNodeType::Collection
888    } else {
889        SkosNodeType::ConceptScheme
890    };
891
892    // Build hierarchy — top-level concepts are those not in any narrower list.
893    // `placed` tracks concepts already claimed by a parent to avoid emitting
894    // the same concept under multiple parents (diamond hierarchies).
895    let mut hierarchy: HashMap<String, SkosConcept> = HashMap::new();
896    let mut placed: HashSet<String> = HashSet::new();
897
898    for concept_id in rdm.get_concept_ids() {
899        if exclude_ids.contains(concept_id.as_str()) {
900            continue;
901        }
902        if !all_narrower_ids.contains(concept_id) {
903            if let Some(concept_with_children) =
904                build_concept_tree_from_rdm(concept_id, &all_skos_concepts, rdm, &mut placed)
905            {
906                hierarchy.insert(concept_id.clone(), concept_with_children);
907            }
908        }
909    }
910
911    let top_level_concepts = if hierarchy.is_empty() {
912        all_skos_concepts.clone()
913    } else {
914        hierarchy
915    };
916
917    SkosCollection {
918        id: rdm.id.clone(),
919        uri: None,
920        pref_labels: collection_pref_labels,
921        alt_labels: HashMap::new(),
922        scope_notes: HashMap::new(),
923        node_type: skos_node_type,
924        concepts: top_level_concepts,
925        all_concepts: all_skos_concepts,
926        values: HashMap::new(),
927    }
928}
929
930/// Build a concept tree recursively from RDM narrower relationships.
931///
932/// `placed` tracks concept IDs already claimed by a parent, preventing the
933/// same concept from appearing under multiple parents (diamond hierarchies)
934/// which would produce duplicate `<skos:Concept>` elements in SKOS XML.
935fn build_concept_tree_from_rdm(
936    concept_id: &str,
937    all_concepts: &HashMap<String, SkosConcept>,
938    rdm_collection: &RdmCollection,
939    placed: &mut HashSet<String>,
940) -> Option<SkosConcept> {
941    if !placed.insert(concept_id.to_string()) {
942        return None; // already claimed by another parent
943    }
944
945    let mut concept = all_concepts.get(concept_id)?.clone();
946
947    if let Some(rdm_concept) = rdm_collection.get_concept(concept_id) {
948        if !rdm_concept.narrower.is_empty() {
949            let mut children = Vec::new();
950            for child_id in &rdm_concept.narrower {
951                if let Some(child) =
952                    build_concept_tree_from_rdm(child_id, all_concepts, rdm_collection, placed)
953                {
954                    children.push(child);
955                }
956            }
957            if !children.is_empty() {
958                concept.children = Some(children);
959            }
960        }
961    }
962
963    Some(concept)
964}
965
966// =============================================================================
967// ExternalResolver Implementation
968// =============================================================================
969
970use crate::type_serialization::ExternalResolver;
971
972impl ExternalResolver for RdmCache {
973    fn resolve_concept(
974        &self,
975        collection_id: &str,
976        concept_id: &str,
977        language: &str,
978    ) -> Option<String> {
979        self.lookup_label(collection_id, concept_id, language)
980    }
981}
982
983// =============================================================================
984// ConceptLookup Implementation
985// =============================================================================
986
987use crate::label_resolution::ConceptLookup;
988
989impl ConceptLookup for RdmCache {
990    fn lookup_by_label(&self, collection_id: &str, label: &str) -> Option<String> {
991        self.collections
992            .get(collection_id)
993            .and_then(|c| c.find_by_label(label))
994            .map(|c| c.id.clone())
995    }
996}
997
998#[cfg(test)]
999mod tests {
1000    use super::*;
1001
1002    #[test]
1003    fn test_concept_label_lookup() {
1004        let mut cache = RdmCache::new();
1005
1006        let concepts_json = r#"[
1007            {
1008                "id": "concept-1",
1009                "prefLabel": {"en": "English Label", "de": "German Label"}
1010            },
1011            {
1012                "id": "concept-2",
1013                "prefLabel": {"en": "Second Concept"}
1014            }
1015        ]"#;
1016
1017        cache
1018            .add_collection_from_json("collection-1", concepts_json)
1019            .unwrap();
1020
1021        assert!(cache.has_collection("collection-1"));
1022        assert!(!cache.has_collection("collection-2"));
1023
1024        assert_eq!(
1025            cache.lookup_label("collection-1", "concept-1", "en"),
1026            Some("English Label".to_string())
1027        );
1028        assert_eq!(
1029            cache.lookup_label("collection-1", "concept-1", "de"),
1030            Some("German Label".to_string())
1031        );
1032        // Fallback to en
1033        assert_eq!(
1034            cache.lookup_label("collection-1", "concept-1", "fr"),
1035            Some("English Label".to_string())
1036        );
1037        // Not found
1038        assert_eq!(cache.lookup_label("collection-1", "concept-3", "en"), None);
1039    }
1040
1041    #[test]
1042    fn test_clear_cache() {
1043        let mut cache = RdmCache::new();
1044
1045        cache
1046            .add_collection_from_json("coll-1", r#"[{"id": "c1", "prefLabel": {"en": "C1"}}]"#)
1047            .unwrap();
1048        cache
1049            .add_collection_from_json("coll-2", r#"[{"id": "c2", "prefLabel": {"en": "C2"}}]"#)
1050            .unwrap();
1051
1052        assert_eq!(cache.get_collection_ids().len(), 2);
1053
1054        cache.clear();
1055        assert_eq!(cache.get_collection_ids().len(), 0);
1056    }
1057
1058    #[test]
1059    fn test_hierarchical_concepts() {
1060        let mut collection = RdmCollection::new("coll-1".to_string());
1061
1062        // Create parent concept (no broader)
1063        let mut parent_labels = HashMap::new();
1064        parent_labels.insert(
1065            "en".to_string(),
1066            RdmValue::new("v-parent-en".to_string(), "Parent".to_string()),
1067        );
1068        let parent = RdmConcept {
1069            id: "parent".to_string(),
1070            pref_label: parent_labels,
1071            alt_labels: HashMap::new(),
1072            broader: vec![],
1073            narrower: vec!["child".to_string()],
1074            scope_note: HashMap::new(),
1075        };
1076
1077        // Create child concept (has broader)
1078        let mut child_labels = HashMap::new();
1079        child_labels.insert(
1080            "en".to_string(),
1081            RdmValue::new("v-child-en".to_string(), "Child".to_string()),
1082        );
1083        let child = RdmConcept {
1084            id: "child".to_string(),
1085            pref_label: child_labels,
1086            alt_labels: HashMap::new(),
1087            broader: vec!["parent".to_string()],
1088            narrower: vec![],
1089            scope_note: HashMap::new(),
1090        };
1091
1092        collection.add_concept(parent);
1093        collection.add_concept(child);
1094
1095        // Collection should have 2 concepts
1096        assert_eq!(collection.len(), 2);
1097
1098        // Only parent should be in top_concepts (child has broader)
1099        let top = collection.get_top_concepts();
1100        assert_eq!(top.len(), 1);
1101        assert_eq!(top[0].id, "parent");
1102
1103        // Both concepts should be accessible
1104        assert!(collection.has_concept("parent"));
1105        assert!(collection.has_concept("child"));
1106    }
1107
1108    #[test]
1109    fn test_get_concept_mut() {
1110        let mut collection = RdmCollection::new("coll-1".to_string());
1111
1112        let mut labels = HashMap::new();
1113        labels.insert(
1114            "en".to_string(),
1115            RdmValue::new("v-c1-en".to_string(), "Original".to_string()),
1116        );
1117        let concept = RdmConcept {
1118            id: "c1".to_string(),
1119            pref_label: labels,
1120            alt_labels: HashMap::new(),
1121            broader: vec![],
1122            narrower: vec![],
1123            scope_note: HashMap::new(),
1124        };
1125
1126        collection.add_concept(concept);
1127
1128        // Modify the concept
1129        if let Some(c) = collection.get_concept_mut("c1") {
1130            c.narrower.push("c2".to_string());
1131            c.pref_label.insert(
1132                "de".to_string(),
1133                RdmValue::new("v-c1-de".to_string(), "Geändert".to_string()),
1134            );
1135        }
1136
1137        // Verify changes persisted
1138        let c = collection.get_concept("c1").unwrap();
1139        assert_eq!(c.narrower, vec!["c2".to_string()]);
1140        assert_eq!(
1141            c.pref_label.get("de").map(|v| v.value.as_str()),
1142            Some("Geändert")
1143        );
1144    }
1145
1146    #[test]
1147    fn test_add_child_concept_hierarchy() {
1148        let mut collection = RdmCollection::new("coll-1".to_string());
1149
1150        // Add parent
1151        let mut parent_labels = HashMap::new();
1152        parent_labels.insert(
1153            "en".to_string(),
1154            RdmValue::new("v-animals-en".to_string(), "Animals".to_string()),
1155        );
1156        let parent = RdmConcept {
1157            id: "animals".to_string(),
1158            pref_label: parent_labels,
1159            alt_labels: HashMap::new(),
1160            broader: vec![],
1161            narrower: vec![],
1162            scope_note: HashMap::new(),
1163        };
1164        collection.add_concept(parent);
1165
1166        // Update parent's narrower list
1167        if let Some(p) = collection.get_concept_mut("animals") {
1168            p.narrower.push("mammals".to_string());
1169        }
1170
1171        // Add child with broader pointing to parent
1172        let mut child_labels = HashMap::new();
1173        child_labels.insert(
1174            "en".to_string(),
1175            RdmValue::new("v-mammals-en".to_string(), "Mammals".to_string()),
1176        );
1177        let child = RdmConcept {
1178            id: "mammals".to_string(),
1179            pref_label: child_labels,
1180            alt_labels: HashMap::new(),
1181            broader: vec!["animals".to_string()],
1182            narrower: vec![],
1183            scope_note: HashMap::new(),
1184        };
1185        collection.add_concept(child);
1186
1187        // Verify hierarchy
1188        let top = collection.get_top_concepts();
1189        assert_eq!(top.len(), 1);
1190        assert_eq!(top[0].id, "animals");
1191        assert_eq!(top[0].narrower, vec!["mammals".to_string()]);
1192
1193        let child = collection.get_concept("mammals").unwrap();
1194        assert_eq!(child.broader, vec!["animals".to_string()]);
1195    }
1196
1197    #[test]
1198    fn test_value_id_lookup() {
1199        let mut cache = RdmCache::new();
1200
1201        // JSON with explicit value IDs
1202        let concepts_json = r#"[
1203            {
1204                "id": "concept-1",
1205                "prefLabels": {
1206                    "en": { "id": "value-1-en", "value": "English Label" },
1207                    "de": { "id": "value-1-de", "value": "German Label" }
1208                }
1209            },
1210            {
1211                "id": "concept-2",
1212                "prefLabels": {
1213                    "en": { "id": "value-2-en", "value": "Second Concept" }
1214                }
1215            }
1216        ]"#;
1217
1218        cache
1219            .add_collection_from_json("collection-1", concepts_json)
1220            .unwrap();
1221
1222        // Look up by value ID
1223        let value = cache.lookup_value("collection-1", "value-1-en").unwrap();
1224        assert_eq!(value.id, "value-1-en");
1225        assert_eq!(value.value, "English Label");
1226        assert_eq!(value.concept_id, "concept-1");
1227        assert_eq!(value.language, "en");
1228
1229        // Get concept ID from value ID
1230        assert_eq!(
1231            cache.get_concept_id_for_value("collection-1", "value-1-de"),
1232            Some("concept-1")
1233        );
1234
1235        // Non-existent value ID
1236        assert!(cache.lookup_value("collection-1", "nonexistent").is_none());
1237        assert!(cache
1238            .get_concept_id_for_value("collection-1", "nonexistent")
1239            .is_none());
1240
1241        // Validate existence
1242        assert!(cache.validate_value("collection-1", "value-2-en"));
1243        assert!(!cache.validate_value("collection-1", "nonexistent"));
1244    }
1245
1246    #[test]
1247    fn test_simple_preflabel_format_generates_ids() {
1248        let mut cache = RdmCache::new();
1249
1250        // JSON with simple string format (no value IDs)
1251        let concepts_json = r#"[
1252            {
1253                "id": "concept-1",
1254                "prefLabel": {"en": "Label One", "de": "Etikett Eins"}
1255            }
1256        ]"#;
1257
1258        cache
1259            .add_collection_from_json("collection-1", concepts_json)
1260            .unwrap();
1261
1262        // Value IDs should be generated deterministically
1263        let collection = cache.get_collection("collection-1").unwrap();
1264        let concept = collection.get_concept("concept-1").unwrap();
1265
1266        // Check that value IDs were generated (not __pending__)
1267        let en_value = concept.pref_label.get("en").unwrap();
1268        assert_ne!(en_value.id, "__pending__");
1269        assert_eq!(en_value.value, "Label One");
1270
1271        // Should be able to look up by the generated value ID
1272        let looked_up = collection.get_value_by_id(&en_value.id).unwrap();
1273        assert_eq!(looked_up.value, "Label One");
1274        assert_eq!(looked_up.concept_id, "concept-1");
1275    }
1276
1277    #[test]
1278    fn test_get_parent_id() {
1279        let mut cache = RdmCache::new();
1280
1281        let concepts_json = r#"[
1282            {
1283                "id": "parent-concept",
1284                "prefLabel": {"en": "Parent"}
1285            },
1286            {
1287                "id": "child-concept",
1288                "prefLabel": {"en": "Child"},
1289                "broader": ["parent-concept"]
1290            }
1291        ]"#;
1292
1293        cache
1294            .add_collection_from_json("coll-1", concepts_json)
1295            .unwrap();
1296
1297        // Child should have parent
1298        assert_eq!(
1299            cache.get_parent_id("coll-1", "child-concept"),
1300            Some("parent-concept".to_string())
1301        );
1302
1303        // Parent has no parent (top-level)
1304        assert_eq!(cache.get_parent_id("coll-1", "parent-concept"), None);
1305
1306        // Non-existent concept
1307        assert_eq!(cache.get_parent_id("coll-1", "nonexistent"), None);
1308    }
1309}