1use serde_json::Value;
12use std::collections::{HashMap, HashSet};
13use uuid::Uuid;
14
15pub const DEFAULT_RESOLVABLE_DATATYPES: &[&str] = &[
19 "concept",
20 "concept-list",
21 "domain-value",
22 "domain-value-list",
23];
24
25pub const DEFAULT_CONFIG_KEYS: &[&str] = &["rdmCollection", "controlledList"];
27
28#[derive(Debug, Clone)]
30pub struct LabelResolutionError {
31 pub message: String,
32 pub errors: Vec<String>,
33}
34
35impl std::fmt::Display for LabelResolutionError {
36 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
37 write!(f, "{}", self.message)
38 }
39}
40
41impl std::error::Error for LabelResolutionError {}
42
43pub trait ConceptLookup {
48 fn lookup_by_label(&self, collection_id: &str, label: &str) -> Option<String>;
51}
52
53#[inline]
55pub fn is_valid_uuid(s: &str) -> bool {
56 Uuid::parse_str(s).is_ok()
57}
58
59#[derive(Clone, Debug)]
61pub struct LabelResolutionConfig {
62 pub resolvable_datatypes: Vec<String>,
64 pub config_keys: Vec<String>,
66 pub strict: bool,
68}
69
70impl Default for LabelResolutionConfig {
71 fn default() -> Self {
72 Self {
73 resolvable_datatypes: DEFAULT_RESOLVABLE_DATATYPES
74 .iter()
75 .map(|s| s.to_string())
76 .collect(),
77 config_keys: DEFAULT_CONFIG_KEYS.iter().map(|s| s.to_string()).collect(),
78 strict: false,
79 }
80 }
81}
82
83impl LabelResolutionConfig {
84 pub fn new(resolvable_datatypes: Vec<String>, config_keys: Vec<String>, strict: bool) -> Self {
86 Self {
87 resolvable_datatypes,
88 config_keys,
89 strict,
90 }
91 }
92
93 pub fn with_additional_datatypes(mut self, datatypes: &[&str]) -> Self {
95 for dt in datatypes {
96 if !self.resolvable_datatypes.contains(&dt.to_string()) {
97 self.resolvable_datatypes.push(dt.to_string());
98 }
99 }
100 self
101 }
102
103 pub fn with_additional_config_keys(mut self, keys: &[&str]) -> Self {
105 for key in keys {
106 if !self.config_keys.contains(&key.to_string()) {
107 self.config_keys.push(key.to_string());
108 }
109 }
110 self
111 }
112
113 pub fn with_strict(mut self, strict: bool) -> Self {
115 self.strict = strict;
116 self
117 }
118}
119
120pub fn build_alias_to_collection_map(
125 graph: &Value,
126 config: &LabelResolutionConfig,
127) -> HashMap<String, String> {
128 let mut alias_to_collection: HashMap<String, String> = HashMap::new();
129
130 let graph_def = if let Some(graphs) = graph.get("graph").and_then(|g| g.as_array()) {
132 graphs.first().cloned().unwrap_or(graph.clone())
133 } else {
134 graph.clone()
135 };
136
137 let nodes = match graph_def.get("nodes").and_then(|n| n.as_array()) {
139 Some(n) => n,
140 None => return alias_to_collection,
141 };
142
143 let resolvable_set: HashSet<&str> = config
144 .resolvable_datatypes
145 .iter()
146 .map(|s| s.as_str())
147 .collect();
148
149 for node in nodes {
150 let alias = match node.get("alias").and_then(|a| a.as_str()) {
151 Some(a) => a,
152 None => continue,
153 };
154
155 let datatype = match node.get("datatype").and_then(|d| d.as_str()) {
156 Some(d) => d,
157 None => continue,
158 };
159
160 if !resolvable_set.contains(datatype) {
161 continue;
162 }
163
164 let node_config = match node.get("config") {
165 Some(c) => c,
166 None => continue,
167 };
168
169 for key in &config.config_keys {
171 if let Some(collection_id) = node_config.get(key).and_then(|v| v.as_str()) {
172 alias_to_collection.insert(alias.to_string(), collection_id.to_string());
173 break;
174 }
175 }
176 }
177
178 alias_to_collection
179}
180
181pub fn find_needed_collections(
185 tree: &Value,
186 alias_to_collection: &HashMap<String, String>,
187) -> HashSet<String> {
188 let mut needed: HashSet<String> = HashSet::new();
189
190 fn scan(
191 value: &Value,
192 alias: Option<&str>,
193 alias_map: &HashMap<String, String>,
194 needed: &mut HashSet<String>,
195 ) {
196 match value {
197 Value::Object(obj) => {
198 if let Some(inner) = obj.get("_value") {
200 scan(inner, alias, alias_map, needed);
201 return;
202 }
203 for (key, v) in obj {
205 scan(v, Some(key.as_str()), alias_map, needed);
206 }
207 }
208 Value::Array(arr) => {
209 for item in arr {
210 scan(item, alias, alias_map, needed);
211 }
212 }
213 Value::String(_) => {
214 if let Some(a) = alias {
215 if let Some(collection_id) = alias_map.get(a) {
216 needed.insert(collection_id.clone());
217 }
218 }
219 }
220 _ => {}
221 }
222 }
223
224 scan(tree, None, alias_to_collection, &mut needed);
225 needed
226}
227
228pub fn resolve_labels<L: ConceptLookup>(
238 tree: Value,
239 alias_to_collection: &HashMap<String, String>,
240 lookup: &L,
241 strict: bool,
242) -> Result<Value, LabelResolutionError> {
243 let mut errors: Vec<String> = Vec::new();
244
245 fn resolve(
246 value: Value,
247 alias: Option<&str>,
248 alias_map: &HashMap<String, String>,
249 lookup: &impl ConceptLookup,
250 errors: &mut Vec<String>,
251 strict: bool,
252 ) -> Value {
253 match value {
254 Value::Object(mut obj) => {
255 if obj.contains_key("_value") {
257 if let Some(inner) = obj.remove("_value") {
258 let resolved = resolve(inner, alias, alias_map, lookup, errors, strict);
259 obj.insert("_value".to_string(), resolved);
260 }
261 return Value::Object(obj);
262 }
263 let resolved_obj: serde_json::Map<String, Value> = obj
265 .into_iter()
266 .map(|(key, v)| {
267 let resolved =
268 resolve(v, Some(key.as_str()), alias_map, lookup, errors, strict);
269 (key, resolved)
270 })
271 .collect();
272 Value::Object(resolved_obj)
273 }
274 Value::Array(arr) => {
275 let resolved_arr: Vec<Value> = arr
276 .into_iter()
277 .map(|item| resolve(item, alias, alias_map, lookup, errors, strict))
278 .collect();
279 Value::Array(resolved_arr)
280 }
281 Value::String(s) => {
282 if let Some(a) = alias {
283 if let Some(collection_id) = alias_map.get(a) {
284 if is_valid_uuid(&s) {
286 return Value::String(s);
287 }
288
289 if let Some(concept_id) = lookup.lookup_by_label(collection_id, &s) {
291 return Value::String(concept_id);
292 } else if strict {
293 errors.push(format!(
294 "Label '{}' not found in collection '{}' for field '{}'",
295 s, collection_id, a
296 ));
297 }
298 }
299 }
300 Value::String(s)
301 }
302 other => other,
303 }
304 }
305
306 let resolved = resolve(tree, None, alias_to_collection, lookup, &mut errors, strict);
307
308 if !errors.is_empty() {
309 return Err(LabelResolutionError {
310 message: format!("Failed to resolve labels:\n {}", errors.join("\n ")),
311 errors,
312 });
313 }
314
315 Ok(resolved)
316}
317
318pub fn resolve_labels_full<L: ConceptLookup>(
327 tree_json: &str,
328 graph_json: &str,
329 lookup: &L,
330 config: &LabelResolutionConfig,
331) -> Result<(String, HashSet<String>), LabelResolutionError> {
332 let tree: Value = serde_json::from_str(tree_json).map_err(|e| LabelResolutionError {
334 message: format!("Failed to parse tree JSON: {}", e),
335 errors: vec![],
336 })?;
337
338 let graph: Value = serde_json::from_str(graph_json).map_err(|e| LabelResolutionError {
339 message: format!("Failed to parse graph JSON: {}", e),
340 errors: vec![],
341 })?;
342
343 let alias_to_collection = build_alias_to_collection_map(&graph, config);
345
346 if alias_to_collection.is_empty() {
347 return Ok((tree_json.to_string(), HashSet::new()));
349 }
350
351 let needed_collections = find_needed_collections(&tree, &alias_to_collection);
353
354 let resolved = resolve_labels(tree, &alias_to_collection, lookup, config.strict)?;
356
357 let resolved_json = serde_json::to_string(&resolved).map_err(|e| LabelResolutionError {
359 message: format!("Failed to serialize resolved tree: {}", e),
360 errors: vec![],
361 })?;
362
363 Ok((resolved_json, needed_collections))
364}
365
366#[cfg(test)]
367mod tests {
368 use super::*;
369
370 struct MockLookup {
371 collections: HashMap<String, HashMap<String, String>>,
372 }
373
374 impl ConceptLookup for MockLookup {
375 fn lookup_by_label(&self, collection_id: &str, label: &str) -> Option<String> {
376 self.collections
377 .get(collection_id)?
378 .get(&label.to_lowercase())
379 .cloned()
380 }
381 }
382
383 #[test]
384 fn test_is_valid_uuid() {
385 assert!(is_valid_uuid("f8dbf847-aa2b-5a56-bf9e-b4648e8bda8b"));
386 assert!(is_valid_uuid("F8DBF847-AA2B-5A56-BF9E-B4648E8BDA8B"));
387 assert!(!is_valid_uuid("not-a-uuid"));
388 assert!(!is_valid_uuid("Category A"));
389 }
390
391 #[test]
392 fn test_build_alias_to_collection_map() {
393 let graph = serde_json::json!({
394 "nodes": [
395 {
396 "alias": "category",
397 "datatype": "concept",
398 "config": {"rdmCollection": "collection-1"}
399 },
400 {
401 "alias": "tags",
402 "datatype": "concept-list",
403 "config": {"rdmCollection": "collection-2"}
404 },
405 {
406 "alias": "status",
407 "datatype": "reference",
408 "config": {"controlledList": "collection-3"}
409 },
410 {
411 "alias": "name",
412 "datatype": "string",
413 "config": {}
414 }
415 ]
416 });
417
418 let config = LabelResolutionConfig::default();
420 let map = build_alias_to_collection_map(&graph, &config);
421
422 assert_eq!(map.get("category"), Some(&"collection-1".to_string()));
423 assert_eq!(map.get("tags"), Some(&"collection-2".to_string()));
424 assert_eq!(map.get("status"), None);
426 assert_eq!(map.get("name"), None);
427 }
428
429 #[test]
430 fn test_build_alias_to_collection_map_with_additional_datatypes() {
431 let graph = serde_json::json!({
432 "nodes": [
433 {
434 "alias": "category",
435 "datatype": "concept",
436 "config": {"rdmCollection": "collection-1"}
437 },
438 {
439 "alias": "status",
440 "datatype": "reference",
441 "config": {"controlledList": "collection-2"}
442 }
443 ]
444 });
445
446 let config = LabelResolutionConfig::default().with_additional_datatypes(&["reference"]);
448 let map = build_alias_to_collection_map(&graph, &config);
449
450 assert_eq!(map.get("category"), Some(&"collection-1".to_string()));
451 assert_eq!(map.get("status"), Some(&"collection-2".to_string()));
452 }
453
454 #[test]
455 fn test_find_needed_collections() {
456 let tree = serde_json::json!({
457 "category": ["Cat A", "Cat B"],
458 "name": ["John"],
459 "status": ["Active"]
460 });
461
462 let mut alias_map = HashMap::new();
463 alias_map.insert("category".to_string(), "coll-1".to_string());
464 alias_map.insert("status".to_string(), "coll-2".to_string());
465
466 let needed = find_needed_collections(&tree, &alias_map);
467
468 assert!(needed.contains("coll-1"));
469 assert!(needed.contains("coll-2"));
470 assert_eq!(needed.len(), 2);
471 }
472
473 #[test]
474 fn test_resolve_labels() {
475 let tree = serde_json::json!({
476 "category": ["Category A", "Category B"],
477 "name": ["John"]
478 });
479
480 let mut alias_map = HashMap::new();
481 alias_map.insert("category".to_string(), "test-collection".to_string());
482
483 let mut concepts = HashMap::new();
484 concepts.insert("category a".to_string(), "uuid-a".to_string());
485 concepts.insert("category b".to_string(), "uuid-b".to_string());
486
487 let mut collections = HashMap::new();
488 collections.insert("test-collection".to_string(), concepts);
489
490 let lookup = MockLookup { collections };
491
492 let resolved = resolve_labels(tree, &alias_map, &lookup, false).unwrap();
493
494 assert_eq!(resolved["category"][0], "uuid-a");
495 assert_eq!(resolved["category"][1], "uuid-b");
496 assert_eq!(resolved["name"][0], "John");
497 }
498
499 #[test]
500 fn test_resolve_labels_uuid_passthrough() {
501 let tree = serde_json::json!({
502 "category": ["f8dbf847-aa2b-5a56-bf9e-b4648e8bda8b"]
503 });
504
505 let mut alias_map = HashMap::new();
506 alias_map.insert("category".to_string(), "test-collection".to_string());
507
508 let lookup = MockLookup {
509 collections: HashMap::new(),
510 };
511
512 let resolved = resolve_labels(tree, &alias_map, &lookup, false).unwrap();
513
514 assert_eq!(
515 resolved["category"][0],
516 "f8dbf847-aa2b-5a56-bf9e-b4648e8bda8b"
517 );
518 }
519
520 #[test]
521 fn test_resolve_labels_strict_mode() {
522 let tree = serde_json::json!({
523 "category": ["Unknown Label"]
524 });
525
526 let mut alias_map = HashMap::new();
527 alias_map.insert("category".to_string(), "test-collection".to_string());
528
529 let lookup = MockLookup {
530 collections: HashMap::new(),
531 };
532
533 let result = resolve_labels(tree, &alias_map, &lookup, true);
534 assert!(result.is_err());
535 assert!(result.unwrap_err().message.contains("Unknown Label"));
536 }
537
538 #[test]
539 fn test_resolve_labels_value_wrapper() {
540 let tree = serde_json::json!({
541 "category": [{"_value": "Category A"}]
542 });
543
544 let mut alias_map = HashMap::new();
545 alias_map.insert("category".to_string(), "test-collection".to_string());
546
547 let mut concepts = HashMap::new();
548 concepts.insert("category a".to_string(), "uuid-a".to_string());
549
550 let mut collections = HashMap::new();
551 collections.insert("test-collection".to_string(), concepts);
552
553 let lookup = MockLookup { collections };
554
555 let resolved = resolve_labels(tree, &alias_map, &lookup, false).unwrap();
556
557 assert_eq!(resolved["category"][0]["_value"], "uuid-a");
558 }
559}