Skip to main content

components_rs/context/
expand.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4use crate::error::{ComponentsJsError, Result};
5
6/// A resolved JSON-LD context that maps short terms to full IRIs.
7#[derive(Debug, Clone, Default)]
8pub struct ContextResolver {
9    /// @vocab — default IRI prefix for unmapped terms
10    pub vocab: Option<String>,
11    /// prefix:suffix mappings (e.g., "oo" -> "https://...#")
12    pub prefixes: HashMap<String, String>,
13    /// Direct term mappings (e.g., "Class" -> TermDef { iri, type_coercion })
14    pub terms: HashMap<String, TermDef>,
15}
16
17#[derive(Debug, Clone)]
18pub struct TermDef {
19    pub iri: String,
20    pub type_coercion: Option<String>,
21    pub container: Option<String>,
22}
23
24impl ContextResolver {
25    pub fn new() -> Self {
26        Self::default()
27    }
28
29    /// Parse a JSON-LD @context value and build the resolver.
30    /// `known_contexts` maps context IRIs to their parsed JSON content (from ModuleState.contexts).
31    pub fn from_context_value(
32        context_value: &serde_json::Value,
33        known_contexts: &HashMap<String, serde_json::Value>,
34    ) -> Result<Self> {
35        let mut resolver = Self::new();
36        resolver.load_context_value(context_value, known_contexts)?;
37        Ok(resolver)
38    }
39
40    fn load_context_value(
41        &mut self,
42        value: &serde_json::Value,
43        known_contexts: &HashMap<String, serde_json::Value>,
44    ) -> Result<()> {
45        match value {
46            serde_json::Value::Array(arr) => {
47                for item in arr {
48                    self.load_context_value(item, known_contexts)?;
49                }
50            }
51            serde_json::Value::String(url) => {
52                // Look up the context by URL in known_contexts
53                if let Some(ctx_doc) = known_contexts.get(url.as_str()) {
54                    // A context document may have a @context key itself
55                    if let Some(inner) = ctx_doc.get("@context") {
56                        self.load_context_value(inner, known_contexts)?;
57                    } else {
58                        // The document IS the context object
59                        self.load_context_object(ctx_doc)?;
60                    }
61                } else {
62                    tracing::warn!("Unknown context URL: {url} — skipping");
63                }
64            }
65            serde_json::Value::Object(_) => {
66                self.load_context_object(value)?;
67            }
68            _ => {}
69        }
70        Ok(())
71    }
72
73    fn load_context_object(&mut self, obj: &serde_json::Value) -> Result<()> {
74        let map = obj
75            .as_object()
76            .ok_or_else(|| ComponentsJsError::ContextResolution("Expected object".into()))?;
77
78        for (key, val) in map {
79            match key.as_str() {
80                "@vocab" => {
81                    if let Some(s) = val.as_str() {
82                        self.vocab = Some(s.to_string());
83                    }
84                }
85                k if k.starts_with('@') => {
86                    // Skip other JSON-LD keywords
87                }
88                _ => match val {
89                    serde_json::Value::String(iri) => {
90                        // Could be a prefix (ends with / or #) or a direct term mapping
91                        if iri.ends_with('/') || iri.ends_with('#') || iri.ends_with(':') {
92                            self.prefixes.insert(key.clone(), iri.clone());
93                        } else {
94                            self.terms.insert(
95                                key.clone(),
96                                TermDef {
97                                    iri: iri.clone(),
98                                    type_coercion: None,
99                                    container: None,
100                                },
101                            );
102                        }
103                    }
104                    serde_json::Value::Object(def) => {
105                        if let Some(id) = def.get("@id").and_then(|v| v.as_str()) {
106                            let type_coercion =
107                                def.get("@type").and_then(|v| v.as_str()).map(String::from);
108                            let container = def
109                                .get("@container")
110                                .and_then(|v| v.as_str())
111                                .map(String::from);
112                            self.terms.insert(
113                                key.clone(),
114                                TermDef {
115                                    iri: id.to_string(),
116                                    type_coercion,
117                                    container,
118                                },
119                            );
120                        }
121                    }
122                    _ => {}
123                },
124            }
125        }
126        Ok(())
127    }
128
129    /// Expand a compacted term to a full IRI.
130    /// E.g., "Class" -> "oo:Class" -> "https://...#Class"
131    /// Or "oo:Class" -> "https://...#Class"
132    /// Handles chained prefixes like "clv:x" -> "npmd:pkg/x" -> "https://.../pkg/x"
133    pub fn expand_term(&self, term: &str) -> String {
134        self.expand_term_depth(term, 0)
135    }
136
137    fn expand_term_depth(&self, term: &str, depth: usize) -> String {
138        if depth > 10 {
139            return term.to_string();
140        }
141
142        // 1. Check direct term mapping
143        if let Some(def) = self.terms.get(term) {
144            return self.expand_term_depth(&def.iri, depth + 1);
145        }
146
147        // 2. Check prefix:suffix
148        if let Some((prefix, suffix)) = term.split_once(':') {
149            if !suffix.starts_with("//") {
150                if let Some(base) = self.prefixes.get(prefix) {
151                    let expanded_base = self.expand_term_depth(base, depth + 1);
152                    return format!("{expanded_base}{suffix}");
153                }
154            }
155        }
156
157        // 3. If it already looks like a full IRI, return as-is
158        if term.contains("://") {
159            return term.to_string();
160        }
161
162        // 4. Apply @vocab
163        if let Some(vocab) = &self.vocab {
164            return format!("{vocab}{term}");
165        }
166
167        term.to_string()
168    }
169
170    /// Compact a full IRI back to a prefixed form.
171    /// Returns the shortest representation: tries exact term matches first,
172    /// then prefix matches, then @vocab, falling back to the original IRI.
173    pub fn compact_iri(&self, iri: &str) -> String {
174        // 1. Check exact reverse term mapping (full IRI → short term)
175        for (term, def) in &self.terms {
176            let expanded = self.expand_term(&def.iri);
177            if expanded == iri {
178                return term.clone();
179            }
180        }
181
182        // 2. Find best prefix match (longest base IRI wins → shortest suffix)
183        let mut best: Option<(String, usize)> = None; // (compact form, prefix base len)
184        for (prefix, base_iri) in &self.prefixes {
185            let expanded_base = self.expand_term(base_iri);
186            if let Some(suffix) = iri.strip_prefix(expanded_base.as_str()) {
187                let base_len = expanded_base.len();
188                if best.as_ref().is_none_or(|(_, bl)| base_len > *bl) {
189                    best = Some((format!("{prefix}:{suffix}"), base_len));
190                }
191            }
192        }
193        if let Some((compact, _)) = best {
194            return compact;
195        }
196
197        // 3. Try @vocab
198        if let Some(vocab) = &self.vocab {
199            if let Some(suffix) = iri.strip_prefix(vocab.as_str()) {
200                if !suffix.contains('/') && !suffix.contains('#') {
201                    return suffix.to_string();
202                }
203            }
204        }
205
206        iri.to_string()
207    }
208}
209
210/// Project-wide bidirectional IRI translator.
211///
212/// Built by merging all known contexts. Provides both expansion (compact → full IRI)
213/// and compaction (full IRI → compact form) across the entire project.
214#[derive(Debug, Clone, Default)]
215pub struct IriCompactor {
216    /// Fully expanded prefix map: prefix name → full IRI base.
217    prefixes: Vec<(String, String)>,
218    /// Fully expanded direct term map: short term → full IRI.
219    terms: Vec<(String, String)>,
220    /// @vocab value (if any).
221    vocab: Option<String>,
222}
223
224impl IriCompactor {
225    /// Build a project-wide compactor from all known contexts.
226    pub fn from_contexts(known_contexts: &HashMap<String, serde_json::Value>) -> Result<Self> {
227        // Build a single merged ContextResolver from all contexts
228        let mut resolver = ContextResolver::new();
229        for ctx_doc in known_contexts.values() {
230            if let Some(inner) = ctx_doc.get("@context") {
231                resolver.load_context_value(inner, known_contexts)?;
232            } else {
233                resolver.load_context_object(ctx_doc)?;
234            }
235        }
236
237        // Pre-expand all prefixes and terms for fast lookup
238        let mut prefixes: Vec<(String, String)> = resolver
239            .prefixes
240            .iter()
241            .map(|(name, base)| {
242                let expanded = resolver.expand_term(base);
243                (name.clone(), expanded)
244            })
245            .collect();
246        // Sort by expanded base length descending (longest match first)
247        prefixes.sort_by(|a, b| b.1.len().cmp(&a.1.len()));
248
249        let terms: Vec<(String, String)> = resolver
250            .terms
251            .iter()
252            .map(|(name, def)| {
253                let expanded = resolver.expand_term(&def.iri);
254                (name.clone(), expanded)
255            })
256            .collect();
257
258        Ok(Self {
259            prefixes,
260            terms,
261            vocab: resolver.vocab,
262        })
263    }
264
265    /// Compact a full IRI to its shortest prefixed form.
266    pub fn compact(&self, iri: &str) -> String {
267        // 1. Exact term match
268        for (term, expanded) in &self.terms {
269            if expanded == iri {
270                return term.clone();
271            }
272        }
273
274        // 2. Best prefix match (already sorted longest-first)
275        for (prefix, base) in &self.prefixes {
276            if let Some(suffix) = iri.strip_prefix(base.as_str()) {
277                return format!("{prefix}:{suffix}");
278            }
279        }
280
281        // 3. @vocab
282        if let Some(vocab) = &self.vocab {
283            if let Some(suffix) = iri.strip_prefix(vocab.as_str()) {
284                if !suffix.contains('/') && !suffix.contains('#') {
285                    return suffix.to_string();
286                }
287            }
288        }
289
290        iri.to_string()
291    }
292
293    /// Expand a compact term to a full IRI.
294    pub fn expand(&self, term: &str) -> String {
295        // 1. Direct term match
296        for (name, expanded) in &self.terms {
297            if name == term {
298                return expanded.clone();
299            }
300        }
301
302        // 2. prefix:suffix
303        if let Some((prefix, suffix)) = term.split_once(':') {
304            if !suffix.starts_with("//") {
305                for (name, base) in &self.prefixes {
306                    if name == prefix {
307                        return format!("{base}{suffix}");
308                    }
309                }
310            }
311        }
312
313        // 3. Already a full IRI
314        if term.contains("://") {
315            return term.to_string();
316        }
317
318        // 4. @vocab
319        if let Some(vocab) = &self.vocab {
320            return format!("{vocab}{term}");
321        }
322
323        term.to_string()
324    }
325}
326
327/// An expanded JSON-LD node with full IRIs as keys.
328#[derive(Debug, Clone, Serialize, Deserialize)]
329pub struct ExpandedNode {
330    pub id: Option<String>,
331    pub types: Vec<String>,
332    pub properties: HashMap<String, Vec<serde_json::Value>>,
333}
334
335/// Extract the @graph entries from a JSON-LD document, expanding all terms.
336pub fn extract_graph_nodes(
337    doc: &serde_json::Value,
338    known_contexts: &HashMap<String, serde_json::Value>,
339) -> Result<Vec<ExpandedNode>> {
340    // Build the context resolver from the document's @context
341    let resolver = if let Some(ctx) = doc.get("@context") {
342        ContextResolver::from_context_value(ctx, known_contexts)?
343    } else {
344        ContextResolver::new()
345    };
346
347    // Get graph entries: either @graph array or the document itself
348    let entries: Vec<&serde_json::Value> = if let Some(graph) = doc.get("@graph") {
349        if let Some(arr) = graph.as_array() {
350            arr.iter().collect()
351        } else {
352            vec![graph]
353        }
354    } else if doc.get("@id").is_some() || doc.get("@type").is_some() {
355        // The document itself is a node
356        vec![doc]
357    } else {
358        vec![]
359    };
360
361    let mut nodes = Vec::new();
362    for entry in entries {
363        if let Some(node) = expand_node(entry, &resolver) {
364            nodes.push(node);
365        }
366    }
367    Ok(nodes)
368}
369
370fn expand_node(value: &serde_json::Value, resolver: &ContextResolver) -> Option<ExpandedNode> {
371    let obj = value.as_object()?;
372
373    let id = obj.get("@id").and_then(|v| v.as_str()).map(|s| resolver.expand_term(s));
374
375    let types: Vec<String> = match obj.get("@type") {
376        Some(serde_json::Value::String(t)) => vec![resolver.expand_term(t)],
377        Some(serde_json::Value::Array(arr)) => arr
378            .iter()
379            .filter_map(|v| v.as_str())
380            .map(|s| resolver.expand_term(s))
381            .collect(),
382        _ => vec![],
383    };
384
385    let mut properties = HashMap::new();
386    for (key, val) in obj {
387        if key.starts_with('@') {
388            continue;
389        }
390        let expanded_key = resolver.expand_term(key);
391        let values = normalize_to_array(val);
392        properties.insert(expanded_key, values);
393    }
394
395    Some(ExpandedNode {
396        id,
397        types,
398        properties,
399    })
400}
401
402fn normalize_to_array(value: &serde_json::Value) -> Vec<serde_json::Value> {
403    match value {
404        serde_json::Value::Array(arr) => arr.clone(),
405        other => vec![other.clone()],
406    }
407}
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    fn make_cjs_context() -> HashMap<String, serde_json::Value> {
414        let ctx_json: serde_json::Value = serde_json::json!({
415            "@context": {
416                "oo": "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#",
417                "Module": { "@id": "oo:Module" },
418                "Class": { "@id": "oo:Class" },
419                "AbstractClass": { "@id": "oo:AbstractClass" },
420                "components": { "@id": "oo:component" },
421                "parameters": { "@id": "oo:parameter" },
422                "extends": { "@id": "rdfs:subClassOf", "@type": "@id" },
423                "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
424                "doap": "http://usefulinc.com/ns/doap#",
425                "requireName": { "@id": "doap:name" },
426                "requireElement": { "@id": "oo:componentPath" },
427                "import": { "@id": "rdfs:seeAlso", "@type": "@id" }
428            }
429        });
430        let mut known = HashMap::new();
431        known.insert(
432            "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld".to_string(),
433            ctx_json,
434        );
435        known
436    }
437
438    #[test]
439    fn test_expand_term_direct_mapping() {
440        let known = make_cjs_context();
441        let ctx_ref = serde_json::json!([
442            "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld"
443        ]);
444        let resolver = ContextResolver::from_context_value(&ctx_ref, &known).unwrap();
445
446        assert_eq!(
447            resolver.expand_term("Class"),
448            "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#Class"
449        );
450        assert_eq!(
451            resolver.expand_term("Module"),
452            "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#Module"
453        );
454    }
455
456    #[test]
457    fn test_expand_term_prefix() {
458        let known = make_cjs_context();
459        let ctx_ref = serde_json::json!([
460            "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld"
461        ]);
462        let resolver = ContextResolver::from_context_value(&ctx_ref, &known).unwrap();
463
464        assert_eq!(
465            resolver.expand_term("oo:Class"),
466            "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#Class"
467        );
468    }
469
470    #[test]
471    fn test_expand_term_with_local_context() {
472        let known = make_cjs_context();
473        let ctx_ref = serde_json::json!([
474            "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld",
475            {
476                "ex": "http://example.org/",
477                "hello": "http://example.org/hello/"
478            }
479        ]);
480        let resolver = ContextResolver::from_context_value(&ctx_ref, &known).unwrap();
481
482        assert_eq!(resolver.expand_term("ex:MyModule"), "http://example.org/MyModule");
483        assert_eq!(resolver.expand_term("hello:say"), "http://example.org/hello/say");
484    }
485
486    #[test]
487    fn test_extract_graph_nodes() {
488        let known = make_cjs_context();
489        let doc = serde_json::json!({
490            "@context": [
491                "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld",
492                { "ex": "http://example.org/", "hello": "http://example.org/hello/" }
493            ],
494            "@graph": [
495                {
496                    "@id": "ex:HelloWorldModule",
497                    "@type": "Module",
498                    "requireName": "helloworld",
499                    "components": [
500                        {
501                            "@id": "ex:HelloWorldModule#SayHelloComponent",
502                            "@type": "Class",
503                            "requireElement": "Hello",
504                            "parameters": [
505                                { "@id": "hello:say" },
506                                { "@id": "hello:hello" }
507                            ]
508                        }
509                    ]
510                }
511            ]
512        });
513
514        let nodes = extract_graph_nodes(&doc, &known).unwrap();
515        assert_eq!(nodes.len(), 1);
516        let module = &nodes[0];
517        assert_eq!(module.id.as_deref(), Some("http://example.org/HelloWorldModule"));
518        assert_eq!(
519            module.types,
520            vec!["https://linkedsoftwaredependencies.org/vocabularies/object-oriented#Module"]
521        );
522    }
523
524    #[test]
525    fn test_vocab_expansion() {
526        let known = HashMap::new();
527        let ctx = serde_json::json!({
528            "@vocab": "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#",
529            "ex": "http://example.org/"
530        });
531        let resolver = ContextResolver::from_context_value(&ctx, &known).unwrap();
532
533        assert_eq!(
534            resolver.expand_term("SomeUnknownTerm"),
535            "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#SomeUnknownTerm"
536        );
537    }
538
539    #[test]
540    fn test_compact_iri_term() {
541        let known = make_cjs_context();
542        let ctx_ref = serde_json::json!([
543            "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld"
544        ]);
545        let resolver = ContextResolver::from_context_value(&ctx_ref, &known).unwrap();
546
547        // "Class" is a direct term mapping, so it's shorter than "oo:Class"
548        assert_eq!(
549            resolver.compact_iri(
550                "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#Class"
551            ),
552            "Class"
553        );
554        // Prefix-only IRI that has no term shortcut
555        assert_eq!(
556            resolver.compact_iri(
557                "http://www.w3.org/2000/01/rdf-schema#label"
558            ),
559            "rdfs:label"
560        );
561    }
562
563    #[test]
564    fn test_compact_iri_prefix() {
565        let known = make_cjs_context();
566        let ctx_ref = serde_json::json!([
567            "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld",
568            { "ex": "http://example.org/" }
569        ]);
570        let resolver = ContextResolver::from_context_value(&ctx_ref, &known).unwrap();
571
572        assert_eq!(
573            resolver.compact_iri("http://example.org/Foo"),
574            "ex:Foo"
575        );
576    }
577
578    #[test]
579    fn test_compact_iri_unknown() {
580        let known = make_cjs_context();
581        let ctx_ref = serde_json::json!([
582            "https://linkedsoftwaredependencies.org/bundles/npm/componentsjs/^4.0.0/components/context.jsonld"
583        ]);
584        let resolver = ContextResolver::from_context_value(&ctx_ref, &known).unwrap();
585
586        // Unknown IRI should be returned as-is
587        assert_eq!(
588            resolver.compact_iri("https://unknown.example.org/Something"),
589            "https://unknown.example.org/Something"
590        );
591    }
592
593    #[test]
594    fn test_iri_compactor_roundtrip() {
595        let known = make_cjs_context();
596        let compactor = IriCompactor::from_contexts(&known).unwrap();
597
598        // "Class" is defined as a direct term → returns shortest form
599        let full = "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#Class";
600        let compact = compactor.compact(full);
601        assert_eq!(compact, "Class");
602        assert_eq!(compactor.expand(&compact), full);
603
604        // "extends" is a term for rdfs:subClassOf → returns shortest form
605        let full2 = "http://www.w3.org/2000/01/rdf-schema#subClassOf";
606        let compact2 = compactor.compact(full2);
607        assert_eq!(compact2, "extends");
608        assert_eq!(compactor.expand(&compact2), full2);
609    }
610
611    #[test]
612    fn test_iri_compactor_expand() {
613        let known = make_cjs_context();
614        let compactor = IriCompactor::from_contexts(&known).unwrap();
615
616        assert_eq!(
617            compactor.expand("oo:Module"),
618            "https://linkedsoftwaredependencies.org/vocabularies/object-oriented#Module"
619        );
620    }
621}