Skip to main content

alizarin_core/
label_resolution.rs

1//! Label Resolution Module
2//!
3//! Provides core label-to-UUID resolution logic shared between WASM (JS) and PyO3 (Python).
4//!
5//! This module handles:
6//! - Scanning JSON trees to identify which collections are needed
7//! - Building alias -> collection ID mappings from graph definitions
8//! - Resolving label strings to UUIDs using collection lookups
9//! - UUID validation and passthrough
10
11use serde_json::Value;
12use std::collections::{HashMap, HashSet};
13use uuid::Uuid;
14
15/// Datatypes that support label resolution by default.
16/// Extension types (e.g. "reference") should register themselves
17/// via the extension handler mechanism rather than being listed here.
18pub const DEFAULT_RESOLVABLE_DATATYPES: &[&str] = &[
19    "concept",
20    "concept-list",
21    "domain-value",
22    "domain-value-list",
23];
24
25/// Config keys that hold collection IDs (in order of preference)
26pub const DEFAULT_CONFIG_KEYS: &[&str] = &["rdmCollection", "controlledList"];
27
28/// Error type for label resolution
29#[derive(Debug, Clone)]
30pub struct LabelResolutionError {
31    pub message: String,
32    pub errors: Vec<String>,
33}
34
35impl std::fmt::Display for LabelResolutionError {
36    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37        write!(f, "{}", self.message)
38    }
39}
40
41impl std::error::Error for LabelResolutionError {}
42
43/// Trait for looking up concepts by label in a collection.
44///
45/// This trait abstracts over the actual collection storage (RdmCache in Python,
46/// StaticCollection in JS) so the resolution logic can be shared.
47pub trait ConceptLookup {
48    /// Look up a concept ID by its label in a specific collection.
49    /// Returns None if not found or ambiguous.
50    fn lookup_by_label(&self, collection_id: &str, label: &str) -> Option<String>;
51}
52
53/// Check if a string is a valid UUID
54#[inline]
55pub fn is_valid_uuid(s: &str) -> bool {
56    Uuid::parse_str(s).is_ok()
57}
58
59/// Configuration for label resolution
60#[derive(Clone, Debug)]
61pub struct LabelResolutionConfig {
62    /// Datatypes that should have their labels resolved
63    pub resolvable_datatypes: Vec<String>,
64    /// Config keys to check for collection IDs
65    pub config_keys: Vec<String>,
66    /// If true, return errors for unresolved labels. If false, pass through.
67    pub strict: bool,
68}
69
70impl Default for LabelResolutionConfig {
71    fn default() -> Self {
72        Self {
73            resolvable_datatypes: DEFAULT_RESOLVABLE_DATATYPES
74                .iter()
75                .map(|s| s.to_string())
76                .collect(),
77            config_keys: DEFAULT_CONFIG_KEYS.iter().map(|s| s.to_string()).collect(),
78            strict: false,
79        }
80    }
81}
82
83impl LabelResolutionConfig {
84    /// Create a new config with custom datatypes and keys
85    pub fn new(resolvable_datatypes: Vec<String>, config_keys: Vec<String>, strict: bool) -> Self {
86        Self {
87            resolvable_datatypes,
88            config_keys,
89            strict,
90        }
91    }
92
93    /// Add additional resolvable datatypes
94    pub fn with_additional_datatypes(mut self, datatypes: &[&str]) -> Self {
95        for dt in datatypes {
96            if !self.resolvable_datatypes.contains(&dt.to_string()) {
97                self.resolvable_datatypes.push(dt.to_string());
98            }
99        }
100        self
101    }
102
103    /// Add additional config keys
104    pub fn with_additional_config_keys(mut self, keys: &[&str]) -> Self {
105        for key in keys {
106            if !self.config_keys.contains(&key.to_string()) {
107                self.config_keys.push(key.to_string());
108            }
109        }
110        self
111    }
112
113    /// Set strict mode
114    pub fn with_strict(mut self, strict: bool) -> Self {
115        self.strict = strict;
116        self
117    }
118}
119
120/// Build a mapping from node alias to collection ID based on graph definition.
121///
122/// Returns a HashMap where keys are node aliases and values are collection IDs
123/// for nodes with resolvable datatypes.
124pub fn build_alias_to_collection_map(
125    graph: &Value,
126    config: &LabelResolutionConfig,
127) -> HashMap<String, String> {
128    let mut alias_to_collection: HashMap<String, String> = HashMap::new();
129
130    // Handle wrapped graph format: {graph: [graphDef]} or direct graphDef
131    let graph_def = if let Some(graphs) = graph.get("graph").and_then(|g| g.as_array()) {
132        graphs.first().cloned().unwrap_or(graph.clone())
133    } else {
134        graph.clone()
135    };
136
137    // Get nodes array
138    let nodes = match graph_def.get("nodes").and_then(|n| n.as_array()) {
139        Some(n) => n,
140        None => return alias_to_collection,
141    };
142
143    let resolvable_set: HashSet<&str> = config
144        .resolvable_datatypes
145        .iter()
146        .map(|s| s.as_str())
147        .collect();
148
149    for node in nodes {
150        let alias = match node.get("alias").and_then(|a| a.as_str()) {
151            Some(a) => a,
152            None => continue,
153        };
154
155        let datatype = match node.get("datatype").and_then(|d| d.as_str()) {
156            Some(d) => d,
157            None => continue,
158        };
159
160        if !resolvable_set.contains(datatype) {
161            continue;
162        }
163
164        let node_config = match node.get("config") {
165            Some(c) => c,
166            None => continue,
167        };
168
169        // Check config keys in order of preference
170        for key in &config.config_keys {
171            if let Some(collection_id) = node_config.get(key).and_then(|v| v.as_str()) {
172                alias_to_collection.insert(alias.to_string(), collection_id.to_string());
173                break;
174            }
175        }
176    }
177
178    alias_to_collection
179}
180
181/// Scan a JSON tree to find which collections are needed for resolution.
182///
183/// Returns a set of collection IDs that appear in the tree for resolvable fields.
184pub fn find_needed_collections(
185    tree: &Value,
186    alias_to_collection: &HashMap<String, String>,
187) -> HashSet<String> {
188    let mut needed: HashSet<String> = HashSet::new();
189
190    fn scan(
191        value: &Value,
192        alias: Option<&str>,
193        alias_map: &HashMap<String, String>,
194        needed: &mut HashSet<String>,
195    ) {
196        match value {
197            Value::Object(obj) => {
198                // Check for _value wrapper
199                if let Some(inner) = obj.get("_value") {
200                    scan(inner, alias, alias_map, needed);
201                    return;
202                }
203                // Process each field
204                for (key, v) in obj {
205                    scan(v, Some(key.as_str()), alias_map, needed);
206                }
207            }
208            Value::Array(arr) => {
209                for item in arr {
210                    scan(item, alias, alias_map, needed);
211                }
212            }
213            Value::String(_) => {
214                if let Some(a) = alias {
215                    if let Some(collection_id) = alias_map.get(a) {
216                        needed.insert(collection_id.clone());
217                    }
218                }
219            }
220            _ => {}
221        }
222    }
223
224    scan(tree, None, alias_to_collection, &mut needed);
225    needed
226}
227
228/// Resolve labels to UUIDs in a JSON tree.
229///
230/// This is the core resolution function. It takes:
231/// - The tree to resolve
232/// - The alias -> collection mapping
233/// - A lookup implementation for finding concepts
234/// - Configuration options
235///
236/// Returns the resolved tree and any errors encountered.
237pub fn resolve_labels<L: ConceptLookup>(
238    tree: Value,
239    alias_to_collection: &HashMap<String, String>,
240    lookup: &L,
241    strict: bool,
242) -> Result<Value, LabelResolutionError> {
243    let mut errors: Vec<String> = Vec::new();
244
245    fn resolve(
246        value: Value,
247        alias: Option<&str>,
248        alias_map: &HashMap<String, String>,
249        lookup: &impl ConceptLookup,
250        errors: &mut Vec<String>,
251        strict: bool,
252    ) -> Value {
253        match value {
254            Value::Object(mut obj) => {
255                // Check for _value wrapper
256                if obj.contains_key("_value") {
257                    if let Some(inner) = obj.remove("_value") {
258                        let resolved = resolve(inner, alias, alias_map, lookup, errors, strict);
259                        obj.insert("_value".to_string(), resolved);
260                    }
261                    return Value::Object(obj);
262                }
263                // Process each field
264                let resolved_obj: serde_json::Map<String, Value> = obj
265                    .into_iter()
266                    .map(|(key, v)| {
267                        let resolved =
268                            resolve(v, Some(key.as_str()), alias_map, lookup, errors, strict);
269                        (key, resolved)
270                    })
271                    .collect();
272                Value::Object(resolved_obj)
273            }
274            Value::Array(arr) => {
275                let resolved_arr: Vec<Value> = arr
276                    .into_iter()
277                    .map(|item| resolve(item, alias, alias_map, lookup, errors, strict))
278                    .collect();
279                Value::Array(resolved_arr)
280            }
281            Value::String(s) => {
282                if let Some(a) = alias {
283                    if let Some(collection_id) = alias_map.get(a) {
284                        // Skip if already a UUID
285                        if is_valid_uuid(&s) {
286                            return Value::String(s);
287                        }
288
289                        // Try to resolve the label
290                        if let Some(concept_id) = lookup.lookup_by_label(collection_id, &s) {
291                            return Value::String(concept_id);
292                        } else if strict {
293                            errors.push(format!(
294                                "Label '{}' not found in collection '{}' for field '{}'",
295                                s, collection_id, a
296                            ));
297                        }
298                    }
299                }
300                Value::String(s)
301            }
302            other => other,
303        }
304    }
305
306    let resolved = resolve(tree, None, alias_to_collection, lookup, &mut errors, strict);
307
308    if !errors.is_empty() {
309        return Err(LabelResolutionError {
310            message: format!("Failed to resolve labels:\n  {}", errors.join("\n  ")),
311            errors,
312        });
313    }
314
315    Ok(resolved)
316}
317
318/// High-level function to resolve labels in a JSON tree.
319///
320/// This combines all steps:
321/// 1. Parse graph and build alias mapping
322/// 2. Scan tree for needed collections (returned for lazy loading)
323/// 3. Resolve labels using the provided lookup
324///
325/// Returns (resolved_tree, needed_collection_ids)
326pub fn resolve_labels_full<L: ConceptLookup>(
327    tree_json: &str,
328    graph_json: &str,
329    lookup: &L,
330    config: &LabelResolutionConfig,
331) -> Result<(String, HashSet<String>), LabelResolutionError> {
332    // Parse inputs
333    let tree: Value = serde_json::from_str(tree_json).map_err(|e| LabelResolutionError {
334        message: format!("Failed to parse tree JSON: {}", e),
335        errors: vec![],
336    })?;
337
338    let graph: Value = serde_json::from_str(graph_json).map_err(|e| LabelResolutionError {
339        message: format!("Failed to parse graph JSON: {}", e),
340        errors: vec![],
341    })?;
342
343    // Build alias -> collection mapping
344    let alias_to_collection = build_alias_to_collection_map(&graph, config);
345
346    if alias_to_collection.is_empty() {
347        // No resolvable nodes, return tree unchanged
348        return Ok((tree_json.to_string(), HashSet::new()));
349    }
350
351    // Find which collections are needed
352    let needed_collections = find_needed_collections(&tree, &alias_to_collection);
353
354    // Resolve labels
355    let resolved = resolve_labels(tree, &alias_to_collection, lookup, config.strict)?;
356
357    // Serialize result
358    let resolved_json = serde_json::to_string(&resolved).map_err(|e| LabelResolutionError {
359        message: format!("Failed to serialize resolved tree: {}", e),
360        errors: vec![],
361    })?;
362
363    Ok((resolved_json, needed_collections))
364}
365
366#[cfg(test)]
367mod tests {
368    use super::*;
369
370    struct MockLookup {
371        collections: HashMap<String, HashMap<String, String>>,
372    }
373
374    impl ConceptLookup for MockLookup {
375        fn lookup_by_label(&self, collection_id: &str, label: &str) -> Option<String> {
376            self.collections
377                .get(collection_id)?
378                .get(&label.to_lowercase())
379                .cloned()
380        }
381    }
382
383    #[test]
384    fn test_is_valid_uuid() {
385        assert!(is_valid_uuid("f8dbf847-aa2b-5a56-bf9e-b4648e8bda8b"));
386        assert!(is_valid_uuid("F8DBF847-AA2B-5A56-BF9E-B4648E8BDA8B"));
387        assert!(!is_valid_uuid("not-a-uuid"));
388        assert!(!is_valid_uuid("Category A"));
389    }
390
391    #[test]
392    fn test_build_alias_to_collection_map() {
393        let graph = serde_json::json!({
394            "nodes": [
395                {
396                    "alias": "category",
397                    "datatype": "concept",
398                    "config": {"rdmCollection": "collection-1"}
399                },
400                {
401                    "alias": "tags",
402                    "datatype": "concept-list",
403                    "config": {"rdmCollection": "collection-2"}
404                },
405                {
406                    "alias": "status",
407                    "datatype": "reference",
408                    "config": {"controlledList": "collection-3"}
409                },
410                {
411                    "alias": "name",
412                    "datatype": "string",
413                    "config": {}
414                }
415            ]
416        });
417
418        // Default config only includes core types (concept, concept-list)
419        let config = LabelResolutionConfig::default();
420        let map = build_alias_to_collection_map(&graph, &config);
421
422        assert_eq!(map.get("category"), Some(&"collection-1".to_string()));
423        assert_eq!(map.get("tags"), Some(&"collection-2".to_string()));
424        // "reference" is an extension type — not in default resolvable set
425        assert_eq!(map.get("status"), None);
426        assert_eq!(map.get("name"), None);
427    }
428
429    #[test]
430    fn test_build_alias_to_collection_map_with_additional_datatypes() {
431        let graph = serde_json::json!({
432            "nodes": [
433                {
434                    "alias": "category",
435                    "datatype": "concept",
436                    "config": {"rdmCollection": "collection-1"}
437                },
438                {
439                    "alias": "status",
440                    "datatype": "reference",
441                    "config": {"controlledList": "collection-2"}
442                }
443            ]
444        });
445
446        // Extensions can add their datatypes via with_additional_datatypes
447        let config = LabelResolutionConfig::default().with_additional_datatypes(&["reference"]);
448        let map = build_alias_to_collection_map(&graph, &config);
449
450        assert_eq!(map.get("category"), Some(&"collection-1".to_string()));
451        assert_eq!(map.get("status"), Some(&"collection-2".to_string()));
452    }
453
454    #[test]
455    fn test_find_needed_collections() {
456        let tree = serde_json::json!({
457            "category": ["Cat A", "Cat B"],
458            "name": ["John"],
459            "status": ["Active"]
460        });
461
462        let mut alias_map = HashMap::new();
463        alias_map.insert("category".to_string(), "coll-1".to_string());
464        alias_map.insert("status".to_string(), "coll-2".to_string());
465
466        let needed = find_needed_collections(&tree, &alias_map);
467
468        assert!(needed.contains("coll-1"));
469        assert!(needed.contains("coll-2"));
470        assert_eq!(needed.len(), 2);
471    }
472
473    #[test]
474    fn test_resolve_labels() {
475        let tree = serde_json::json!({
476            "category": ["Category A", "Category B"],
477            "name": ["John"]
478        });
479
480        let mut alias_map = HashMap::new();
481        alias_map.insert("category".to_string(), "test-collection".to_string());
482
483        let mut concepts = HashMap::new();
484        concepts.insert("category a".to_string(), "uuid-a".to_string());
485        concepts.insert("category b".to_string(), "uuid-b".to_string());
486
487        let mut collections = HashMap::new();
488        collections.insert("test-collection".to_string(), concepts);
489
490        let lookup = MockLookup { collections };
491
492        let resolved = resolve_labels(tree, &alias_map, &lookup, false).unwrap();
493
494        assert_eq!(resolved["category"][0], "uuid-a");
495        assert_eq!(resolved["category"][1], "uuid-b");
496        assert_eq!(resolved["name"][0], "John");
497    }
498
499    #[test]
500    fn test_resolve_labels_uuid_passthrough() {
501        let tree = serde_json::json!({
502            "category": ["f8dbf847-aa2b-5a56-bf9e-b4648e8bda8b"]
503        });
504
505        let mut alias_map = HashMap::new();
506        alias_map.insert("category".to_string(), "test-collection".to_string());
507
508        let lookup = MockLookup {
509            collections: HashMap::new(),
510        };
511
512        let resolved = resolve_labels(tree, &alias_map, &lookup, false).unwrap();
513
514        assert_eq!(
515            resolved["category"][0],
516            "f8dbf847-aa2b-5a56-bf9e-b4648e8bda8b"
517        );
518    }
519
520    #[test]
521    fn test_resolve_labels_strict_mode() {
522        let tree = serde_json::json!({
523            "category": ["Unknown Label"]
524        });
525
526        let mut alias_map = HashMap::new();
527        alias_map.insert("category".to_string(), "test-collection".to_string());
528
529        let lookup = MockLookup {
530            collections: HashMap::new(),
531        };
532
533        let result = resolve_labels(tree, &alias_map, &lookup, true);
534        assert!(result.is_err());
535        assert!(result.unwrap_err().message.contains("Unknown Label"));
536    }
537
538    #[test]
539    fn test_resolve_labels_value_wrapper() {
540        let tree = serde_json::json!({
541            "category": [{"_value": "Category A"}]
542        });
543
544        let mut alias_map = HashMap::new();
545        alias_map.insert("category".to_string(), "test-collection".to_string());
546
547        let mut concepts = HashMap::new();
548        concepts.insert("category a".to_string(), "uuid-a".to_string());
549
550        let mut collections = HashMap::new();
551        collections.insert("test-collection".to_string(), concepts);
552
553        let lookup = MockLookup { collections };
554
555        let resolved = resolve_labels(tree, &alias_map, &lookup, false).unwrap();
556
557        assert_eq!(resolved["category"][0]["_value"], "uuid-a");
558    }
559}