Skip to main content

swls_lang_jsonld/
lib.rs

1#![doc(
2    html_logo_url = "https://ajuvercr.github.io/semantic-web-lsp/assets/icons/favicon.png",
3    html_favicon_url = "https://ajuvercr.github.io/semantic-web-lsp/assets/icons/favicon.ico"
4)]
5pub mod cjs;
6
7use std::{borrow::Cow, ops::Range};
8
9use bevy_ecs::{
10    component::Component,
11    observer::On,
12    query::With,
13    resource::Resource,
14    schedule::IntoScheduleConfigs,
15    system::{Commands, Query, Res, RunSystemOnce},
16    world::{CommandQueue, World},
17};
18use components_rs::{
19    components::registry::{resolve_iri_to_url, ComponentRegistry},
20    module_state::ModuleState,
21};
22use oxigraph::model::{GraphName, Literal, NamedNode, Quad};
23use swls_core::{
24    lang::{Lang, LangHelper},
25    lsp_types::{SemanticTokenType, Url},
26    prelude::{goto_definition::GotoDefinitionRequest, *},
27    util::resolve_iri,
28    Started,
29};
30use swls_lang_rdf_base::register_rdf_lang;
31use swls_lang_turtle::lang::parser::TurtleParseError;
32
33pub mod ecs;
34use crate::{
35    ecs::{
36        derive_jsonld_triples, format_jsonld_system, setup_completion, setup_parsing, ContextCache,
37        JsonLdActiveContext,
38    },
39    fs::build_registry,
40};
41
42#[derive(Component, Default)]
43pub struct JsonLdLang;
44
45#[derive(Debug, Default)]
46pub struct JsonLdHelper;
47
48impl LangHelper for JsonLdHelper {
49    fn keyword(&self) -> &[&'static str] {
50        &[
51            "@context",
52            "@id",
53            "@type",
54            "@graph",
55            "@base",
56            "@vocab",
57            "@language",
58            "@value",
59            "@list",
60            "@set",
61            "@reverse",
62            "@index",
63            "@container",
64        ]
65    }
66
67    fn default_position(&self) -> TripleTarget {
68        TripleTarget::Predicate
69    }
70
71    fn unquote<'a>(&self, text: &'a str) -> &'a str {
72        let s = text.strip_prefix('"').unwrap_or(text);
73        s.strip_suffix('"').unwrap_or(s)
74    }
75    fn quote(&self, inp: &str) -> String {
76        format!("\"{}\"", inp)
77    }
78    fn handles_prefix_completion(&self) -> bool {
79        true
80    }
81
82    fn inlay_types_hint(
83        &self,
84        subject: &Range<usize>,
85        rope: &ropey::Rope,
86        last_type: Option<&Range<usize>>,
87        types: Vec<Cow<'_, str>>,
88    ) -> Option<swls_core::lsp_types::InlayHint> {
89        let (label, position) = if let Some(lt) = last_type {
90            if let Some(pos) = offset_to_position(lt.end, &rope) {
91                let label = format!(", {}", types.join(", "));
92                (label, pos)
93            } else {
94                return None;
95            }
96        } else {
97            let offset = if rope.get_char(subject.start) == Some('[') {
98                subject.start + 1
99            } else {
100                subject.end
101            };
102
103            if let Some(pos) = offset_to_position(offset + 1, &rope) {
104                let label = if types.len() == 1 {
105                    format!(r#" "@type": "{}";"#, types[0])
106                } else {
107                    format!(
108                        r#" "@type": [ {} ],"#,
109                        types
110                            .into_iter()
111                            .map(|x| format!("\"{}\"", x))
112                            .collect::<Vec<_>>()
113                            .join(", ")
114                    )
115                };
116                (label, pos)
117            } else {
118                return None;
119            }
120        };
121
122        return Some(swls_core::lsp_types::InlayHint {
123            position,
124            label: swls_core::lsp_types::InlayHintLabel::String(label),
125            kind: None,
126            text_edits: None,
127            tooltip: None,
128            padding_left: None,
129            padding_right: None,
130            data: None,
131        });
132    }
133}
134
135pub fn setup_world<C: Client + ClientSync + Resource + Clone>(world: &mut World) {
136    register_rdf_lang::<JsonLdLang, JsonLdHelper>(world, &["jsonld"], &[".jsonld"]);
137
138    // For .json files and the "json" language ID, only activate JSON-LD when the
139    // source actually contains "@context" — plain JSON files should be left alone.
140    world.add_observer(
141        |trigger: On<CreateEvent>, mut commands: Commands, query: Query<&Source>| {
142            let e = trigger.event();
143            let is_json = trigger
144                .language_id
145                .as_ref()
146                .map(|l| l == "json")
147                .unwrap_or_default()
148                || e.url.as_str().ends_with(".json");
149            if !is_json {
150                return;
151            }
152            let entity = e.entity;
153            if let Ok(source) = query.get(entity) {
154                if source.0.contains("\"@context\"") {
155                    commands
156                        .entity(entity)
157                        .insert(JsonLdLang::default())
158                        .insert(DynLang(Box::new(JsonLdHelper::default())));
159                }
160            }
161        },
162    );
163    world.insert_resource(ContextCache::default());
164    world.insert_resource(Registry::empty());
165    setup_parsing::<C>(world);
166    setup_completion(world);
167
168    world.schedule_scope(FormatLabel, |_, schedule| {
169        schedule.add_systems(format_jsonld_system);
170    });
171
172    world.schedule_scope(Started, |_, schedule| {
173        schedule.add_systems((start_jsonld::<C>,));
174    });
175
176    world.schedule_scope(GotoDefinitionLabel, |_, schedule| {
177        schedule.add_systems(goto_cjs.after(get_current_triple));
178    });
179}
180
181/// Convert a byte offset to an LSP `Position` (line + character).
182fn byte_offset_to_position(source: &str, offset: usize) -> swls_core::lsp_types::Position {
183    let offset = offset.min(source.len());
184    let before = &source[..offset];
185    let line = before.matches('\n').count() as u32;
186    let col = before.rfind('\n').map(|p| offset - p - 1).unwrap_or(offset) as u32;
187    swls_core::lsp_types::Position::new(line, col)
188}
189
190/// Convert a byte-range span to an LSP `Range` using the file's source text.
191fn span_to_lsp_range(source: &str, span: &std::ops::Range<usize>) -> swls_core::lsp_types::Range {
192    swls_core::lsp_types::Range::new(
193        byte_offset_to_position(source, span.start),
194        byte_offset_to_position(source, span.end),
195    )
196}
197
198/// Expand a JSON-LD compact IRI or bare term name using the document's active context.
199///
200/// Handles chains like `css:dist/...` → `npmd:@solid/...dist/...` → `https://...dist/...`
201/// and bare term names like `BearerWebIdExtractor` → `css:dist/...BearerWebIdExtractor`.
202fn expand_iri_with_context(
203    active: &rdf_parsers::jsonld::convert::ActiveContext,
204    value: &str,
205) -> String {
206    expand_iri_inner(active, value, 0)
207}
208
209fn expand_iri_inner(
210    active: &rdf_parsers::jsonld::convert::ActiveContext,
211    value: &str,
212    depth: usize,
213) -> String {
214    if depth > 10 || value.is_empty() || value.starts_with('@') {
215        return value.to_string();
216    }
217    // Already absolute — well-known schemes only, so we don't mistake `css:` for absolute.
218    if value.starts_with("https://")
219        || value.starts_with("http://")
220        || value.starts_with("file://")
221        || value.starts_with("urn:")
222    {
223        return value.to_string();
224    }
225    // Bare term lookup (e.g. "BearerWebIdExtractor")
226    if let Some(def) = active.terms.get(value) {
227        if let Some(iri) = &def.iri {
228            if iri != value {
229                return expand_iri_inner(active, iri, depth + 1);
230            }
231        }
232    }
233    // Compact IRI like "prefix:suffix"
234    if let Some(colon_pos) = value.find(':') {
235        if colon_pos > 0 {
236            let prefix = &value[..colon_pos];
237            let suffix = &value[colon_pos + 1..];
238            if let Some(def) = active.terms.get(prefix) {
239                if let Some(iri) = &def.iri {
240                    let expanded_prefix = expand_iri_inner(active, iri, depth + 1);
241                    return format!("{}{}", expanded_prefix, suffix);
242                }
243            }
244        }
245    }
246    value.to_string()
247}
248
249#[tracing::instrument(skip(query, res))]
250fn goto_cjs(
251    mut query: Query<
252        (
253            &TokenComponent,
254            Option<&TripleComponent>,
255            &Label,
256            &mut GotoDefinitionRequest,
257            Option<&JsonLdActiveContext>,
258        ),
259        With<JsonLdLang>,
260    >,
261    res: Res<Registry>,
262) {
263    use swls_core::lsp_types::{Location, Range};
264
265    for (token, triple, label, mut req, active_ctx) in &mut query {
266        // Only use the expanded IRI from the TripleComponent if the cursor token
267        // actually overlaps the matched term's span.  get_current_triple is lenient
268        // and may fall back to a nearby triple (e.g. the first triple in the
269        // document) when the cursor is on @context or other non-triple content.
270        let triple_term_str = triple.and_then(|tc| {
271            let term_span = match tc.target {
272                TripleTarget::Subject => &tc.triple.subject.span,
273                TripleTarget::Predicate => &tc.triple.predicate.span,
274                TripleTarget::Object => &tc.triple.object.span,
275                TripleTarget::Graph => return None,
276            };
277            let cursor = token.source_span.start;
278            if term_span.start <= cursor && cursor <= term_span.end {
279                tc.term().map(|t| t.as_str())
280            } else {
281                None
282            }
283        });
284        let raw_token = token.text.as_str().trim_matches('"');
285        // When no triple term is available (e.g. cursor in @context), expand compact
286        // IRIs like `css:dist/...` using the document's active context so that
287        // resolve_iri_to_url can match them against import_paths.
288        let context_expanded = if triple_term_str.is_none() {
289            active_ctx.map(|ctx| expand_iri_with_context(&ctx.0, raw_token))
290        } else {
291            None
292        };
293        let st: &str = triple_term_str
294            .as_deref()
295            .or(context_expanded.as_deref())
296            .unwrap_or(raw_token);
297
298        tracing::debug!("Goto definition {:?} {}", triple_term_str, st,);
299
300        // Components: navigate to the component's own source file at the exact @id span.
301        let found_target = if let Some(component) = res.0.components.get(st) {
302            Some((component.source_file.as_str(), component.iri_span.clone()))
303        } else if let Some(module) = res.0.modules.get(st) {
304            Some((module.source_file.as_str(), module.iri_span.clone()))
305        } else if let Some((file, span)) = res.0.parameters.get(st) {
306            Some((file.as_str(), span.clone()))
307        } else {
308            None
309        };
310
311        tracing::debug!(
312            "CJS from {:?} {:?}",
313            found_target,
314            resolve_iri_to_url(st, &res.1.import_paths),
315        );
316        if let Some((file, span)) = found_target {
317            if let Ok(uri) = swls_core::lsp_types::Url::parse(file) {
318                let range = res
319                    .0
320                    .file_sources
321                    .get(file)
322                    .map(|src| span_to_lsp_range(src, &span))
323                    .unwrap_or_default();
324                req.0.push(Location { uri, range });
325                continue;
326            }
327        }
328
329        let iri_no_fragment = st.split('#').next().unwrap_or(st);
330        let resolved = resolve_iri_to_url(iri_no_fragment, &res.1.import_paths)
331            .or_else(|| res.1.context_urls.get(iri_no_fragment).cloned());
332        if let Some(t) = resolved {
333            tracing::debug!("target {}", t.as_str());
334            req.0.push(Location {
335                uri: t,
336                range: Range::default(),
337            });
338            continue;
339        }
340
341        if triple_term_str.is_none() {
342            // Import IRIs: strip any fragment, then resolve to a local file path.
343            let target = resolve_iri(&label.as_str(), st);
344            if let Ok(uri) = swls_core::lsp_types::Url::parse(&target) {
345                req.0.push(Location {
346                    uri,
347                    range: Range::default(),
348                });
349                continue;
350            }
351        }
352
353        tracing::debug!("goto_cjs: no definition found for '{}'", st);
354    }
355}
356
357mod fs {
358    use components_rs::{
359        error::{ComponentsJsError, Result},
360        fs::FsDirEntry,
361    };
362    use swls_core::{lsp_types::Url, prelude::Fs};
363
364    use crate::Registry;
365
366    pub struct LocalFs(Fs);
367
368    #[async_trait::async_trait]
369    impl components_rs::fs::Fs for LocalFs {
370        /// Read the entire contents of a file as a UTF-8 string.
371        async fn read_to_string(&self, url: &Url) -> Result<String> {
372            self.0
373                 .0
374                .read_file(&url)
375                .await
376                .ok_or(ComponentsJsError::General(format!(
377                    "Failed to read file {}",
378                    url.as_str()
379                )))
380        }
381
382        /// List the immediate children of a directory.
383        async fn read_dir(&self, path: &Url) -> Result<Vec<components_rs::fs::FsDirEntry>> {
384            let entries = self
385                .0
386                 .0
387                .read_dir(path)
388                .await
389                .ok_or(ComponentsJsError::General(format!(
390                    "Failed to read dir {:?}",
391                    path.as_str()
392                )))?;
393            Ok(entries
394                .into_iter()
395                .map(|entry| components_rs::fs::FsDirEntry {
396                    name: entry.name,
397                    path: entry.path,
398                    is_dir: entry.is_dir,
399                })
400                .collect())
401        }
402
403        /// Check whether `path` is a file.
404        async fn is_file(&self, path: &Url) -> bool {
405            self.0 .0.is_file(path).await
406        }
407
408        /// Check whether `path` is a directory.
409        async fn is_dir(&self, path: &Url) -> bool {
410            self.0 .0.is_dir(path).await
411        }
412
413        async fn glob(&self, base: &Url, pattern: &str) -> Result<Vec<FsDirEntry>> {
414            let entries = self
415                .0
416                 .0
417                .glob(base, pattern)
418                .await
419                .ok_or(ComponentsJsError::General(format!(
420                    "Failed to read dir {:?} {}",
421                    base.as_str(),
422                    pattern
423                )))?;
424            Ok(entries
425                .into_iter()
426                .map(|entry| components_rs::fs::FsDirEntry {
427                    name: entry.name,
428                    path: entry.path,
429                    is_dir: entry.is_dir,
430                })
431                .collect())
432        }
433    }
434
435    pub async fn build_registry(fs: &Fs, path: &Url) -> Result<Registry> {
436        use components_rs::components::registry::ComponentRegistry;
437        use components_rs::module_state::ModuleState;
438
439        let fs = LocalFs(fs.clone());
440        let state = ModuleState::build(&fs, path).await?;
441
442        let mut registry = ComponentRegistry::new();
443        registry.register_available_modules(&fs, &state).await?;
444        registry.finalize();
445
446        Ok(Registry(registry, state))
447    }
448}
449
450fn build_cjs_quads(registry: &ComponentRegistry) -> Vec<Quad> {
451    let rdf_type = NamedNode::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#type");
452    let rdfs_class = NamedNode::new_unchecked("http://www.w3.org/2000/01/rdf-schema#Class");
453    let rdfs_subclass_of =
454        NamedNode::new_unchecked("http://www.w3.org/2000/01/rdf-schema#subClassOf");
455    let rdfs_comment = NamedNode::new_unchecked("http://www.w3.org/2000/01/rdf-schema#comment");
456    let rdf_property =
457        NamedNode::new_unchecked("http://www.w3.org/1999/02/22-rdf-syntax-ns#Property");
458    let rdfs_domain = NamedNode::new_unchecked("http://www.w3.org/2000/01/rdf-schema#domain");
459    let rdfs_range = NamedNode::new_unchecked("http://www.w3.org/2000/01/rdf-schema#range");
460
461    let graph = GraphName::DefaultGraph;
462    let mut quads = Vec::new();
463
464    for comp in registry.components.values() {
465        let iri = NamedNode::new_unchecked(&comp.iri);
466
467        quads.push(Quad::new(
468            iri.clone(),
469            rdf_type.clone(),
470            rdfs_class.clone(),
471            graph.clone(),
472        ));
473
474        if let Some(comment) = &comp.comment {
475            quads.push(Quad::new(
476                iri.clone(),
477                rdfs_comment.clone(),
478                Literal::new_simple_literal(comment.as_str()),
479                graph.clone(),
480            ));
481        }
482
483        for parent in &comp.extends {
484            let parent_node = NamedNode::new_unchecked(parent);
485            quads.push(Quad::new(
486                iri.clone(),
487                rdfs_subclass_of.clone(),
488                parent_node,
489                graph.clone(),
490            ));
491        }
492
493        for param in &comp.parameters {
494            let param_iri = NamedNode::new_unchecked(&param.iri);
495
496            quads.push(Quad::new(
497                param_iri.clone(),
498                rdf_type.clone(),
499                rdf_property.clone(),
500                graph.clone(),
501            ));
502            quads.push(Quad::new(
503                param_iri.clone(),
504                rdfs_domain.clone(),
505                iri.clone(),
506                graph.clone(),
507            ));
508
509            if let Some(range) = &param.range {
510                let range_node = NamedNode::new_unchecked(range);
511                quads.push(Quad::new(
512                    param_iri.clone(),
513                    rdfs_range.clone(),
514                    range_node,
515                    graph.clone(),
516                ));
517            }
518
519            if let Some(comment) = &param.comment {
520                quads.push(Quad::new(
521                    param_iri.clone(),
522                    rdfs_comment.clone(),
523                    Literal::new_simple_literal(comment.as_str()),
524                    graph.clone(),
525                ));
526            }
527        }
528    }
529
530    quads
531}
532
533#[derive(Resource)]
534pub struct Registry(pub ComponentRegistry, pub ModuleState);
535impl Registry {
536    pub fn empty() -> Self {
537        Self(ComponentRegistry::new(), ModuleState::empty())
538    }
539}
540
541#[tracing::instrument(skip(fs, client, config, commands))]
542fn start_jsonld<C: Client + Resource + Clone>(
543    fs: Res<Fs>,
544    client: Res<C>,
545    config: Res<ServerConfig>,
546    commands: Res<CommandSender>,
547) {
548    if !config.config.jsonld.unwrap_or(true) {
549        return;
550    }
551    let fs = fs.clone();
552    tracing::debug!("loading CJS registry, config: {:?}", config);
553    if let Some(ws) = config.workspaces.first().and_then(|x| {
554        if x.uri.as_str().ends_with('/') {
555            Some(x.uri.clone())
556        } else {
557            Url::parse(&format!("{}/", x.uri.as_str())).ok()
558        }
559    }) {
560        tracing::debug!("CJS workspace root: {:?}", ws.as_str());
561        let commands = commands.clone();
562        let thing = async move {
563            tracing::debug!("Starting CJS registry build for {:?}", ws.as_str());
564            if let Ok(reg) = build_registry(&fs, &ws).await {
565                let mut command_queue = CommandQueue::default();
566                command_queue.push(move |world: &mut World| {
567                    let quads = build_cjs_quads(&reg.0);
568                    world.insert_resource(reg);
569
570                    let store_clone = world.get_resource::<swls_core::store::Store>();
571                    //
572                    if let Some(store) = store_clone {
573                        tracing::debug!("Derive store found adding {} triples", quads.len());
574                        let mut loader = store.0.bulk_loader();
575                        let _ = loader.load_quads(quads.into_iter());
576                        let _ = loader.commit();
577                    }
578
579                    let _ = world.run_system_once(derive_jsonld_triples::<C>);
580                });
581                let _ = commands.unbounded_send(command_queue);
582            }
583            ()
584        };
585        client.spawn(thing);
586    } else {
587        tracing::warn!("No workspace root found, skipping CJS registry build");
588    }
589}
590
591impl Lang for JsonLdLang {
592    type Element = rdf_parsers::model::Turtle;
593    type ElementError = TurtleParseError;
594
595    const LANG: &'static str = "jsonld";
596    const TRIGGERS: &'static [&'static str] = &["\"@", "\""];
597    const CODE_ACTION: bool = false;
598    const HOVER: bool = true;
599    const PATTERN: Option<&'static str> = None;
600
601    const LEGEND_TYPES: &'static [SemanticTokenType] = &[
602        semantic_token::BOOLEAN,
603        SemanticTokenType::COMMENT,
604        SemanticTokenType::ENUM_MEMBER,
605        SemanticTokenType::KEYWORD,
606        SemanticTokenType::NAMESPACE,
607        SemanticTokenType::NUMBER,
608        SemanticTokenType::PROPERTY,
609        SemanticTokenType::STRING,
610    ];
611
612    fn semantic_token_type(kind: rowan::SyntaxKind) -> Option<SemanticTokenType> {
613        use rdf_parsers::jsonld::parser::SyntaxKind as SK;
614        let k = kind.0;
615        if k == SK::Comment as u16 {
616            Some(SemanticTokenType::COMMENT)
617        } else if k == SK::StringToken as u16 {
618            Some(SemanticTokenType::STRING)
619        } else if k == SK::JsonNumber as u16 {
620            Some(SemanticTokenType::NUMBER)
621        } else if k == SK::TrueLit as u16 || k == SK::FalseLit as u16 || k == SK::NullLit as u16 {
622            Some(semantic_token::BOOLEAN)
623        } else {
624            None
625        }
626    }
627
628    fn semantic_token_spans(
629        kind: rowan::SyntaxKind,
630        span: std::ops::Range<usize>,
631        text: &str,
632    ) -> Vec<(SemanticTokenType, std::ops::Range<usize>)> {
633        if text.get(span.start + 1..span.start + 2) == Some("@") {
634            return vec![(SemanticTokenType::KEYWORD, span)];
635        }
636        if text.get(span.end..span.end + 1) == Some(":") {
637            return vec![(SemanticTokenType::NAMESPACE, span)];
638        }
639        Self::semantic_token_type(kind)
640            .map(|t| vec![(t, span)])
641            .unwrap_or_default()
642    }
643}