Skip to main content

components_rs/components/
registry.rs

1//! Component registry: loads and merges all `components.jsonld` files.
2//!
3//! See the [`components`](crate::components) module doc for the two-phase loading overview.
4//!
5//! ## Span extraction
6//!
7//! Every file is parsed with [`rdf_parsers::jsonld::convert::parse_json`] before any processing,
8//! which yields a [`JsonLdVal`] tree carrying byte-range spans for every token.
9//! [`collect_id_spans`] walks the tree and records `expanded_iri → byte_range` in a flat map.
10//! These spans are threaded through both collection phases so that every
11//! [`CjsModule`](crate::components::types::CjsModule),
12//! [`CjsComponent`](crate::components::types::CjsComponent), and
13//! [`CjsParameter`](crate::components::types::CjsParameter) ends up with an `iri_span` that
14//! the LSP can use for goto-definition without re-reading the source file.
15
16use std::collections::HashMap;
17use std::ops::Range;
18
19use rdf_parsers::jsonld::convert::{parse_json, JsonLdVal};
20use url::Url;
21
22use crate::components::types::*;
23use crate::context::expand::{self, ContextResolver, ExpandedNode};
24use crate::error::Result;
25use crate::fs::{self as cfs, Fs};
26use crate::module_state::ModuleState;
27
28// ── Span helpers ─────────────────────────────────────────────────────────────
29
30/// Walk a `JsonLdVal` tree and collect the byte spans of every `@id` value,
31/// keyed by the **expanded** IRI (resolved via `resolver`).
32///
33/// A span entry maps `expanded_iri → byte_range_of_@id_value_in_source`.
34pub fn collect_id_spans(
35    val: &JsonLdVal,
36    resolver: &ContextResolver,
37    out: &mut HashMap<String, Range<usize>>,
38) {
39    match val {
40        JsonLdVal::Object(members, _) => {
41            for (key, _key_span, val_span, value) in members {
42                if key == "@id" {
43                    if let Some(s) = value.as_str() {
44                        let expanded = resolver.expand_term(s);
45                        out.entry(expanded).or_insert_with(|| val_span.clone());
46                    }
47                }
48                collect_id_spans(value, resolver, out);
49            }
50        }
51        JsonLdVal::Array(items) => {
52            for (item, _) in items {
53                collect_id_spans(item, resolver, out);
54            }
55        }
56        _ => {}
57    }
58}
59
60/// Walk a `JsonLdVal` tree and record the source file for every `@id` value
61/// (first-seen wins, consistent with [`collect_id_spans`]).
62///
63/// The resulting map is used to determine which file a component's `@id` was
64/// originally defined in, even when the component is later merged into a
65/// parent node (e.g., the module node in `components.jsonld`).
66pub fn collect_id_sources(
67    val: &JsonLdVal,
68    resolver: &ContextResolver,
69    source_file: &str,
70    out: &mut HashMap<String, String>,
71) {
72    match val {
73        JsonLdVal::Object(members, _) => {
74            for (key, _, _, value) in members {
75                if key == "@id" {
76                    if let Some(s) = value.as_str() {
77                        let expanded = resolver.expand_term(s);
78                        out.entry(expanded)
79                            .or_insert_with(|| source_file.to_string());
80                    }
81                }
82                collect_id_sources(value, resolver, source_file, out);
83            }
84        }
85        JsonLdVal::Array(items) => {
86            for (item, _) in items {
87                collect_id_sources(item, resolver, source_file, out);
88            }
89        }
90        _ => {}
91    }
92}
93
94// ── Registry ─────────────────────────────────────────────────────────────────
95
96/// Registry of all discovered CJS components and modules.
97///
98/// Populated by [`register_available_modules`](Self::register_available_modules)
99/// (two-phase: collect → merge → process) and then finalised with
100/// [`finalize`](Self::finalize) which resolves inherited parameters.
101///
102/// Primary LSP uses:
103/// - **Completion** — iterate `components` to offer `@type` values
104/// - **Hover** — look up a component by IRI to get `comment` and parameter list
105/// - **Goto-definition** — `CjsComponent::iri_span` + `CjsModule::source_file`
106///   give the exact location in the components file
107#[derive(Debug, Clone)]
108pub struct ComponentRegistry {
109    /// All components indexed by their fully expanded IRI.
110    pub components: HashMap<String, CjsComponent>,
111    /// All modules indexed by their fully expanded IRI.
112    pub modules: HashMap<String, CjsModule>,
113    /// All parameters indexed by their fully expanded IRI, pointing to
114    /// `(source_file, iri_span)` for goto-definition without searching every file.
115    pub parameters: HashMap<String, (String, Range<usize>)>,
116    /// Raw source text of every component file that was loaded, keyed by the
117    /// absolute file URL (same strings used in `CjsComponent::source_file` and
118    /// `CjsModule::source_file`).  Used by the LSP to convert `iri_span` byte
119    /// offsets to LSP line/column positions without re-reading files from disk.
120    pub file_sources: HashMap<String, String>,
121}
122
123/// Intermediate node collected during phase 1, before merging by `@id`.
124///
125/// Stores the context resolver from the file where this node was first seen so
126/// that inline component `@id` strings can be expanded later during phase 2
127/// without re-reading the source file.
128#[derive(Debug, Clone)]
129struct CollectedNode {
130    id: String,
131    types: Vec<String>,
132    properties: HashMap<String, Vec<JsonLdVal>>,
133    source_file: String,
134    /// Byte span of the `@id` value in `source_file`.
135    id_span: Range<usize>,
136    /// Context resolver active in `source_file`; kept so that compact IRIs
137    /// inside this node's property values can be expanded during phase 2.
138    resolver: ContextResolver,
139}
140
141impl ComponentRegistry {
142    pub fn new() -> Self {
143        Self {
144            components: HashMap::new(),
145            modules: HashMap::new(),
146            parameters: HashMap::new(),
147            file_sources: HashMap::new(),
148        }
149    }
150
151    /// Discover and register all modules reachable from the module state.
152    ///
153    /// **Phase 1** — recursively loads every `components.jsonld` file (following
154    /// `rdfs:seeAlso` imports), parses each with `JsonLdVal` to harvest `@id`
155    /// byte spans, then merges all nodes by IRI into `all_nodes`.
156    ///
157    /// **Phase 2** — walks `all_nodes` to find `oo:Module` nodes and extracts
158    /// their inline component definitions into `CjsModule`/`CjsComponent`.
159    pub async fn register_available_modules(
160        &mut self,
161        fs: &dyn Fs,
162        state: &ModuleState,
163    ) -> Result<()> {
164        let mut all_nodes: HashMap<String, CollectedNode> = HashMap::new();
165        let mut visited_files: std::collections::HashSet<Url> =
166            std::collections::HashSet::new();
167        let mut id_spans: HashMap<String, Range<usize>> = HashMap::new();
168        let mut id_source_files: HashMap<String, String> = HashMap::new();
169        let mut file_sources: HashMap<String, String> = HashMap::new();
170        // Cache resolved ContextResolvers keyed by the @context value so files
171        // sharing the same context IRI don't rebuild the resolver from scratch.
172        let mut resolver_cache: HashMap<String, ContextResolver> = HashMap::new();
173
174        for version_map in state.component_modules.values() {
175            for component_url in version_map.values() {
176                if cfs::exists(fs, component_url).await {
177                    self.collect_nodes_from_file(
178                        fs,
179                        component_url,
180                        state,
181                        &mut all_nodes,
182                        &mut visited_files,
183                        &mut id_spans,
184                        &mut id_source_files,
185                        &mut file_sources,
186                        &mut resolver_cache,
187                    )
188                    .await?;
189                } else {
190                    tracing::warn!(
191                        "Component file does not exist: {}",
192                        component_url.as_str()
193                    );
194                }
195            }
196        }
197
198        tracing::info!(
199            "Collected {} unique nodes from component files",
200            all_nodes.len()
201        );
202
203        self.process_merged_nodes(&all_nodes, &id_spans, &id_source_files, state)?;
204        self.file_sources = file_sources;
205
206        Ok(())
207    }
208
209    /// Recursively load a component file and its `rdfs:seeAlso` imports.
210    fn collect_nodes_from_file<'a>(
211        &'a self,
212        fs: &'a dyn Fs,
213        url: &'a Url,
214        state: &'a ModuleState,
215        all_nodes: &'a mut HashMap<String, CollectedNode>,
216        visited: &'a mut std::collections::HashSet<Url>,
217        id_spans: &'a mut HashMap<String, Range<usize>>,
218        id_source_files: &'a mut HashMap<String, String>,
219        file_sources: &'a mut HashMap<String, String>,
220        resolver_cache: &'a mut HashMap<String, ContextResolver>,
221    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + 'a + Send>> {
222        Box::pin(async move {
223            if visited.contains(url) {
224                return Ok(());
225            }
226            visited.insert(url.clone());
227
228            tracing::debug!("Loading component file: {}", url.as_str());
229
230            let contents = fs.read_to_string(url).await?;
231            let Some(doc) = parse_json(&contents) else {
232                tracing::warn!("Failed to parse component file: {}", url.as_str());
233                return Ok(());
234            };
235
236            let resolver = if let Some(ctx) = doc.get("@context") {
237                let key = context_cache_key(ctx);
238                if let Some(cached) = resolver_cache.get(&key) {
239                    cached.clone()
240                } else {
241                    let r = ContextResolver::from_context_value(ctx, &state.contexts)?;
242                    resolver_cache.insert(key, r.clone());
243                    r
244                }
245            } else {
246                ContextResolver::new()
247            };
248
249            collect_id_spans(&doc, &resolver, id_spans);
250
251            let nodes = expand::extract_graph_nodes(&doc, &state.contexts)?;
252            let source = url.to_string();
253
254            collect_id_sources(&doc, &resolver, &source, id_source_files);
255            file_sources.insert(source.clone(), contents);
256
257            for node in &nodes {
258                if let Some(id) = &node.id {
259                    let span = id_spans.get(id).cloned().unwrap_or(0..0);
260                    let entry = all_nodes
261                        .entry(id.clone())
262                        .or_insert_with(|| CollectedNode {
263                            id: id.clone(),
264                            types: Vec::new(),
265                            properties: HashMap::new(),
266                            source_file: source.clone(),
267                            id_span: span,
268                            resolver: resolver.clone(),
269                        });
270                    for t in &node.types {
271                        if !entry.types.contains(t) {
272                            entry.types.push(t.clone());
273                        }
274                    }
275                    for (key, vals) in &node.properties {
276                        entry
277                            .properties
278                            .entry(key.clone())
279                            .or_default()
280                            .extend(vals.clone());
281                    }
282                }
283            }
284
285            self.process_imports_collect(
286                fs,
287                &doc,
288                &nodes,
289                &resolver,
290                state,
291                all_nodes,
292                visited,
293                id_spans,
294                id_source_files,
295                file_sources,
296                resolver_cache,
297            )
298            .await?;
299
300            Ok(())
301        })
302    }
303
304    /// Follow `import` / `rdfs:seeAlso` IRIs and recursively collect nodes.
305    fn process_imports_collect<'a>(
306        &'a self,
307        fs: &'a dyn Fs,
308        doc: &'a JsonLdVal,
309        nodes: &'a [ExpandedNode],
310        resolver: &'a ContextResolver,
311        state: &'a ModuleState,
312        all_nodes: &'a mut HashMap<String, CollectedNode>,
313        visited: &'a mut std::collections::HashSet<Url>,
314        id_spans: &'a mut HashMap<String, Range<usize>>,
315        id_source_files: &'a mut HashMap<String, String>,
316        file_sources: &'a mut HashMap<String, String>,
317        resolver_cache: &'a mut HashMap<String, ContextResolver>,
318    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<()>> + 'a + Send>> {
319        Box::pin(async move {
320            let mut import_iris = Vec::new();
321
322            if let Some(import_val) = doc.get("import") {
323                collect_import_iris(import_val, resolver, &mut import_iris);
324            }
325
326            for node in nodes {
327                if let Some(imports) = node.properties.get(IRI_RDFS_SEE_ALSO) {
328                    for import_val in imports {
329                        collect_import_iris(import_val, resolver, &mut import_iris);
330                    }
331                }
332            }
333
334            for iri in import_iris {
335                if let Some(local_url) = resolve_iri_to_url(&iri, &state.import_paths) {
336                    if cfs::exists(fs, &local_url).await {
337                        self.collect_nodes_from_file(
338                            fs,
339                            &local_url,
340                            state,
341                            all_nodes,
342                            visited,
343                            id_spans,
344                            id_source_files,
345                            file_sources,
346                            resolver_cache,
347                        )
348                        .await?;
349                    }
350                }
351            }
352
353            Ok(())
354        })
355    }
356
357    fn process_merged_nodes(
358        &mut self,
359        all_nodes: &HashMap<String, CollectedNode>,
360        id_spans: &HashMap<String, Range<usize>>,
361        id_source_files: &HashMap<String, String>,
362        _state: &ModuleState,
363    ) -> Result<()> {
364        for node in all_nodes.values() {
365            if node.types.contains(&IRI_MODULE.to_string()) {
366                self.register_module_from_merged(node, all_nodes, id_spans, id_source_files)?;
367            }
368        }
369        Ok(())
370    }
371
372    fn register_module_from_merged(
373        &mut self,
374        node: &CollectedNode,
375        _all_nodes: &HashMap<String, CollectedNode>,
376        id_spans: &HashMap<String, Range<usize>>,
377        id_source_files: &HashMap<String, String>,
378    ) -> Result<()> {
379        let require_name = node
380            .properties
381            .get(IRI_DOAP_NAME)
382            .and_then(|v| v.first())
383            .and_then(|v| v.as_str())
384            .map(String::from);
385
386        let mut components = Vec::new();
387
388        if let Some(component_vals) = node.properties.get(IRI_COMPONENT) {
389            for comp_val in component_vals {
390                if let Some(comp) = self.parse_component(
391                    comp_val,
392                    &node.id,
393                    &node.resolver,
394                    id_spans,
395                    id_source_files,
396                    &node.source_file,
397                ) {
398                    self.components.insert(comp.iri.clone(), comp.clone());
399                    components.push(comp);
400                }
401            }
402        }
403
404        let module = CjsModule {
405            iri: node.id.clone(),
406            require_name,
407            components,
408            source_file: node.source_file.clone(),
409            iri_span: node.id_span.clone(),
410        };
411
412        self.modules.insert(node.id.clone(), module);
413        Ok(())
414    }
415
416    /// Extract a `CjsComponent` from a raw property-value object.
417    ///
418    /// The `resolver` comes from the file that originally defined this component
419    /// so that compact IRIs inside the object can be properly expanded. The
420    /// `id_spans` map provides the source location of the `@id` value.
421    fn parse_component(
422        &self,
423        value: &JsonLdVal,
424        module_iri: &str,
425        resolver: &ContextResolver,
426        id_spans: &HashMap<String, Range<usize>>,
427        id_source_files: &HashMap<String, String>,
428        fallback_source_file: &str,
429    ) -> Option<CjsComponent> {
430        let id_str = value.get("@id")?.as_str()?;
431        let iri = resolver.expand_term(id_str);
432        let iri_span = id_spans.get(&iri).cloned().unwrap_or(0..0);
433        // Use the file where this @id was first seen; fall back to the module file.
434        let source_file = id_source_files
435            .get(&iri)
436            .cloned()
437            .unwrap_or_else(|| fallback_source_file.to_string());
438
439        let types: Vec<String> = match value.get("@type") {
440            Some(JsonLdVal::Str(t)) => vec![resolver.expand_term(t)],
441            Some(v) => v
442                .as_array()
443                .map(|arr| {
444                    arr.iter()
445                        .filter_map(|(item, _)| item.as_str())
446                        .map(|s| resolver.expand_term(s))
447                        .collect()
448                })
449                .unwrap_or_default(),
450            None => vec![],
451        };
452
453        let component_type = ComponentType::from_type_iris(&types).or_else(|| {
454            for t in &types {
455                match t.as_str() {
456                    "Class" => return Some(ComponentType::Class),
457                    "AbstractClass" => return Some(ComponentType::AbstractClass),
458                    "Instance" => return Some(ComponentType::Instance),
459                    _ => {}
460                }
461            }
462            None
463        })?;
464
465        let require_element = value
466            .get("requireElement")
467            .or_else(|| value.get(IRI_COMPONENT_PATH))
468            .and_then(|v| v.as_str())
469            .map(String::from);
470
471        let comment = value
472            .get("comment")
473            .or_else(|| value.get(IRI_RDFS_COMMENT))
474            .and_then(|v| v.as_str())
475            .map(String::from);
476
477        let parameters =
478            self.parse_parameters(value, resolver, id_spans, id_source_files, &source_file);
479
480        let extends: Vec<String> = match value
481            .get("extends")
482            .or_else(|| value.get(IRI_RDFS_SUBCLASS_OF))
483        {
484            Some(JsonLdVal::Str(s)) => vec![resolver.expand_term(s)],
485            Some(v) if v.as_array().is_some() => v
486                .as_array()
487                .unwrap()
488                .iter()
489                .filter_map(|(item, _)| match item {
490                    JsonLdVal::Str(s) => Some(resolver.expand_term(s)),
491                    _ => item.get("@id")?.as_str().map(|s| resolver.expand_term(s)),
492                })
493                .collect(),
494            Some(v) => v
495                .get("@id")
496                .and_then(|v| v.as_str())
497                .map(|s| resolver.expand_term(s))
498                .into_iter()
499                .collect(),
500            None => vec![],
501        };
502
503        let constructor_arguments = value
504            .get("constructorArguments")
505            .or_else(|| value.get(IRI_CONSTRUCTOR_ARGUMENTS))
506            .cloned();
507
508        Some(CjsComponent {
509            iri,
510            component_type,
511            require_element,
512            comment,
513            parameters,
514            extends,
515            constructor_arguments,
516            module_iri: Some(module_iri.to_string()),
517            source_file,
518            iri_span,
519        })
520    }
521
522    /// Extract `CjsParameter`s from a component value.
523    fn parse_parameters(
524        &self,
525        value: &JsonLdVal,
526        resolver: &ContextResolver,
527        id_spans: &HashMap<String, Range<usize>>,
528        id_source_files: &HashMap<String, String>,
529        fallback_source_file: &str,
530    ) -> Vec<CjsParameter> {
531        let params = match value.get("parameters").or_else(|| value.get(IRI_PARAMETER)) {
532            Some(v) => v,
533            None => return vec![],
534        };
535
536        let arr = match params.as_array() {
537            Some(a) => a,
538            None => return vec![],
539        };
540
541        arr.iter()
542            .filter_map(|(p, _)| {
543                let id_str = p.get("@id")?.as_str()?;
544                let iri = resolver.expand_term(id_str);
545                let iri_span = id_spans.get(&iri).cloned().unwrap_or(0..0);
546
547                let range =
548                    p.get("range")
549                        .or_else(|| p.get(IRI_RDFS_RANGE))
550                        .and_then(|v| match v {
551                            JsonLdVal::Str(s) => Some(resolver.expand_term(s)),
552                            _ => v.get("@id")?.as_str().map(|s| resolver.expand_term(s)),
553                        });
554                let comment = p
555                    .get("comment")
556                    .or_else(|| p.get(IRI_RDFS_COMMENT))
557                    .and_then(|v| v.as_str())
558                    .map(String::from);
559                let required = p.get("required").and_then(|v| v.as_bool()).unwrap_or(false);
560                let lazy = p.get("lazy").and_then(|v| v.as_bool()).unwrap_or(false);
561                let unique = p.get("unique").and_then(|v| v.as_bool()).unwrap_or(false);
562                let default_value = p.get("default").cloned();
563                let source_file = id_source_files
564                    .get(&iri)
565                    .cloned()
566                    .unwrap_or_else(|| fallback_source_file.to_string());
567
568                Some(CjsParameter {
569                    iri,
570                    range,
571                    comment,
572                    required,
573                    lazy,
574                    unique,
575                    default_value,
576                    source_file,
577                    iri_span,
578                })
579            })
580            .collect()
581    }
582
583    /// Resolve inheritance: walk each component's `extends` chain and merge in
584    /// any parameters not already declared on the component itself.
585    ///
586    /// Must be called after all files have been loaded. Without this step,
587    /// completion only shows parameters declared directly on a component and
588    /// misses those inherited from abstract base classes.
589    pub fn finalize(&mut self) {
590        let component_iris: Vec<String> = self.components.keys().cloned().collect();
591        for iri in component_iris {
592            let inherited_params =
593                self.collect_inherited_params(&iri, &mut std::collections::HashSet::new());
594            if let Some(comp) = self.components.get_mut(&iri) {
595                // Build a set of already-present parameter IRIs so the dedup
596                // check is O(1) instead of O(existing_params).
597                let existing: std::collections::HashSet<String> =
598                    comp.parameters.iter().map(|p| p.iri.clone()).collect();
599                for param in inherited_params {
600                    if !existing.contains(&param.iri) {
601                        comp.parameters.push(param);
602                    }
603                }
604            }
605        }
606
607        // Build flat parameter index for O(1) goto-definition lookups.
608        // Use first-seen (the defining component wins over inheritors).
609        for comp in self.components.values() {
610            for param in &comp.parameters {
611                self.parameters
612                    .entry(param.iri.clone())
613                    .or_insert_with(|| (param.source_file.clone(), param.iri_span.clone()));
614            }
615        }
616    }
617
618    fn collect_inherited_params(
619        &self,
620        iri: &str,
621        visited: &mut std::collections::HashSet<String>,
622    ) -> Vec<CjsParameter> {
623        if !visited.insert(iri.to_string()) {
624            return vec![];
625        }
626
627        let Some(comp) = self.components.get(iri) else {
628            return vec![];
629        };
630
631        let mut params = Vec::new();
632        for parent_iri in &comp.extends.clone() {
633            if let Some(parent) = self.components.get(parent_iri) {
634                params.extend(parent.parameters.clone());
635            }
636            params.extend(self.collect_inherited_params(parent_iri, visited));
637        }
638        params
639    }
640}
641
642/// Produce a stable string key for a `@context` value so resolved
643/// [`ContextResolver`]s can be cached across files that share the same context.
644fn context_cache_key(val: &JsonLdVal) -> String {
645    match val {
646        JsonLdVal::Str(s) => s.clone(),
647        JsonLdVal::Array(arr) => arr
648            .iter()
649            .map(|(v, _)| context_cache_key(v))
650            .collect::<Vec<_>>()
651            .join("\x00"),
652        _ => format!("{val:?}"),
653    }
654}
655
656fn collect_import_iris(value: &JsonLdVal, resolver: &ContextResolver, out: &mut Vec<String>) {
657    match value {
658        JsonLdVal::Str(s) => out.push(resolver.expand_term(s)),
659        _ => {
660            if let Some(arr) = value.as_array() {
661                for (item, _) in arr {
662                    if let Some(s) = item.as_str() {
663                        out.push(resolver.expand_term(s));
664                    }
665                }
666            }
667        }
668    }
669}
670
671/// Resolve an IRI to a local file URL using the import_paths mapping.
672pub fn resolve_iri_to_url(
673    iri: &str,
674    import_paths: &std::collections::HashMap<String, Url>,
675) -> Option<Url> {
676    for (prefix_iri, local_dir) in import_paths {
677        if iri.starts_with(prefix_iri.as_str()) {
678            let suffix = &iri[prefix_iri.len()..];
679            return local_dir.join(suffix).ok();
680        }
681    }
682    // If the IRI is already a file:// URL, parse it directly.
683    if iri.starts_with("file://") {
684        return Url::parse(iri).ok();
685    }
686    None
687}