plotnik_compiler/analyze/
link.rs

1//! Link pass: resolve node types and fields against tree-sitter grammar.
2//!
3//! Two-phase approach:
4//! 1. Resolve all symbols (node types and fields) against grammar
5//! 2. Validate structural constraints (field on node type, child type for field)
6
7use std::collections::HashMap;
8
9use indexmap::{IndexMap, IndexSet};
10use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol};
11use plotnik_langs::Lang;
12use rowan::TextRange;
13
14/// Output from the link phase for binary emission.
15#[derive(Default)]
16pub struct LinkOutput {
17    /// Interned name → NodeTypeId (for binary: StringId → NodeTypeId)
18    pub node_type_ids: IndexMap<Symbol, NodeTypeId>,
19    /// Interned name → NodeFieldId (for binary: StringId → NodeFieldId)
20    pub node_field_ids: IndexMap<Symbol, NodeFieldId>,
21}
22
23use super::symbol_table::SymbolTable;
24use super::utils::find_similar;
25use super::visitor::{Visitor, walk};
26use crate::diagnostics::{DiagnosticKind, Diagnostics};
27use crate::parser::ast::{self, Expr, NamedNode};
28use crate::parser::{SyntaxKind, SyntaxToken, token_src};
29use crate::query::{AstMap, SourceId, SourceMap};
30
31/// Link query against a language grammar.
32///
33/// This function is decoupled from `Query` to allow easier testing and
34/// modularity. It orchestrates the resolution and validation phases.
35pub fn link<'q>(
36    interner: &mut Interner,
37    lang: &Lang,
38    source_map: &'q SourceMap,
39    ast_map: &AstMap,
40    symbol_table: &SymbolTable,
41    output: &mut LinkOutput,
42    diagnostics: &mut Diagnostics,
43) {
44    // Local deduplication maps (not exposed in output)
45    let mut node_type_ids: HashMap<&'q str, Option<NodeTypeId>> = HashMap::new();
46    let mut node_field_ids: HashMap<&'q str, Option<NodeFieldId>> = HashMap::new();
47
48    for (&source_id, root) in ast_map {
49        let mut linker = Linker {
50            interner,
51            lang,
52            source_map,
53            symbol_table,
54            source_id,
55            node_type_ids: &mut node_type_ids,
56            node_field_ids: &mut node_field_ids,
57            output,
58            diagnostics,
59        };
60        linker.link(root);
61    }
62}
63
64struct Linker<'a, 'q> {
65    // Refs
66    interner: &'a mut Interner,
67    lang: &'a Lang,
68    source_map: &'q SourceMap,
69    symbol_table: &'a SymbolTable,
70    source_id: SourceId,
71    node_type_ids: &'a mut HashMap<&'q str, Option<NodeTypeId>>,
72    node_field_ids: &'a mut HashMap<&'q str, Option<NodeFieldId>>,
73    output: &'a mut LinkOutput,
74    diagnostics: &'a mut Diagnostics,
75}
76
77impl<'a, 'q> Linker<'a, 'q> {
78    fn source(&self) -> &'q str {
79        self.source_map.content(self.source_id)
80    }
81
82    fn link(&mut self, root: &ast::Root) {
83        self.resolve_symbols(root);
84        self.validate_structure(root);
85    }
86
87    fn resolve_symbols(&mut self, root: &ast::Root) {
88        let mut resolver = SymbolResolver { linker: self };
89        resolver.visit(root);
90    }
91
92    fn resolve_named_node(&mut self, node: &NamedNode) {
93        if node.is_any() {
94            return;
95        }
96        let Some(type_token) = node.node_type() else {
97            return;
98        };
99        if matches!(
100            type_token.kind(),
101            SyntaxKind::KwError | SyntaxKind::KwMissing
102        ) {
103            return;
104        }
105        let type_name = type_token.text();
106        if self.node_type_ids.contains_key(type_name) {
107            return;
108        }
109        let resolved = self.lang.resolve_named_node(type_name);
110        self.node_type_ids
111            .insert(token_src(&type_token, self.source()), resolved);
112        if let Some(id) = resolved {
113            let sym = self.interner.intern(type_name);
114            self.output.node_type_ids.entry(sym).or_insert(id);
115        }
116        if resolved.is_none() {
117            let all_types = self.lang.all_named_node_kinds();
118            let max_dist = (type_name.len() / 3).clamp(2, 4);
119            let suggestion = find_similar(type_name, &all_types, max_dist);
120
121            let mut builder = self
122                .diagnostics
123                .report(
124                    self.source_id,
125                    DiagnosticKind::UnknownNodeType,
126                    type_token.text_range(),
127                )
128                .message(type_name);
129
130            if let Some(similar) = suggestion {
131                builder = builder.hint(format!("did you mean `{}`?", similar));
132            }
133            builder.emit();
134        }
135    }
136
137    fn resolve_field_by_token(&mut self, name_token: Option<SyntaxToken>) {
138        let Some(name_token) = name_token else {
139            return;
140        };
141        let field_name = name_token.text();
142        if self.node_field_ids.contains_key(field_name) {
143            return;
144        }
145        let resolved = self.lang.resolve_field(field_name);
146        self.node_field_ids
147            .insert(token_src(&name_token, self.source()), resolved);
148        if let Some(id) = resolved {
149            let sym = self.interner.intern(field_name);
150            self.output.node_field_ids.entry(sym).or_insert(id);
151            return;
152        }
153        let all_fields = self.lang.all_field_names();
154        let max_dist = (field_name.len() / 3).clamp(2, 4);
155        let suggestion = find_similar(field_name, &all_fields, max_dist);
156
157        let mut builder = self
158            .diagnostics
159            .report(
160                self.source_id,
161                DiagnosticKind::UnknownField,
162                name_token.text_range(),
163            )
164            .message(field_name);
165
166        if let Some(similar) = suggestion {
167            builder = builder.hint(format!("did you mean `{}`?", similar));
168        }
169        builder.emit();
170    }
171
172    fn validate_structure(&mut self, root: &ast::Root) {
173        let defs: Vec<_> = root.defs().collect();
174        for def in defs {
175            let Some(body) = def.body() else { continue };
176            let mut visited = IndexSet::new();
177            self.validate_expr_structure(&body, None, &mut visited);
178        }
179    }
180
181    fn validate_expr_structure(
182        &mut self,
183        expr: &Expr,
184        ctx: Option<ValidationContext>,
185        visited: &mut IndexSet<String>,
186    ) {
187        match expr {
188            Expr::NamedNode(node) => {
189                let child_ctx = self.make_node_context(node);
190
191                // Predicates are only valid on leaf nodes (grammar check)
192                if let Some(pred) = node.predicate()
193                    && let Some(ctx) = &child_ctx
194                    && (!self.lang.valid_child_types(ctx.parent_id).is_empty()
195                        || !self.lang.fields_for_node_type(ctx.parent_id).is_empty())
196                {
197                    self.diagnostics
198                        .report(
199                            self.source_id,
200                            DiagnosticKind::PredicateOnNonLeaf,
201                            pred.as_cst().text_range(),
202                        )
203                        .emit();
204                }
205
206                for child in node.children() {
207                    if let Expr::FieldExpr(f) = &child {
208                        self.validate_field_expr(f, child_ctx.as_ref(), visited);
209                    } else {
210                        self.validate_expr_structure(&child, child_ctx, visited);
211                    }
212                }
213
214                if let Some(ctx) = child_ctx {
215                    for child in node.as_cst().children() {
216                        if let Some(neg) = ast::NegatedField::cast(child) {
217                            self.validate_negated_field(&neg, &ctx);
218                        }
219                    }
220                }
221            }
222            Expr::AnonymousNode(_) => {}
223            Expr::FieldExpr(f) => {
224                // Should be handled by parent NamedNode, but handle gracefully
225                self.validate_field_expr(f, ctx.as_ref(), visited);
226            }
227            Expr::AltExpr(alt) => {
228                for branch in alt.branches() {
229                    let Some(body) = branch.body() else { continue };
230                    self.validate_expr_structure(&body, ctx, visited);
231                }
232            }
233            Expr::SeqExpr(seq) => {
234                for child in seq.children() {
235                    self.validate_expr_structure(&child, ctx, visited);
236                }
237            }
238            Expr::CapturedExpr(cap) => {
239                let Some(inner) = cap.inner() else { return };
240                self.validate_expr_structure(&inner, ctx, visited);
241            }
242            Expr::QuantifiedExpr(q) => {
243                let Some(inner) = q.inner() else { return };
244                self.validate_expr_structure(&inner, ctx, visited);
245            }
246            Expr::Ref(r) => {
247                let Some(name_token) = r.name() else { return };
248                let name = name_token.text();
249                if !visited.insert(name.to_string()) {
250                    return;
251                }
252                let Some(body) = self.symbol_table.get(name).cloned() else {
253                    visited.swap_remove(name);
254                    return;
255                };
256                self.validate_expr_structure(&body, ctx, visited);
257                visited.swap_remove(name);
258            }
259        }
260    }
261
262    /// Create validation context for a named node's children.
263    fn make_node_context(&self, node: &NamedNode) -> Option<ValidationContext> {
264        if node.is_any() {
265            return None;
266        }
267        let type_token = node.node_type()?;
268        if matches!(
269            type_token.kind(),
270            SyntaxKind::KwError | SyntaxKind::KwMissing
271        ) {
272            return None;
273        }
274        let type_name = type_token.text();
275        let parent_id = self.node_type_ids.get(type_name).copied().flatten()?;
276        // Verify the node type exists in the grammar
277        self.lang.node_type_name(parent_id)?;
278        Some(ValidationContext {
279            parent_id,
280            parent_range: type_token.text_range(),
281        })
282    }
283
284    fn validate_field_expr(
285        &mut self,
286        field: &ast::FieldExpr,
287        ctx: Option<&ValidationContext>,
288        visited: &mut IndexSet<String>,
289    ) {
290        let Some(name_token) = field.name() else {
291            return;
292        };
293        let Some(field_id) = self
294            .node_field_ids
295            .get(name_token.text())
296            .copied()
297            .flatten()
298        else {
299            return;
300        };
301        let Some(ctx) = ctx else { return };
302
303        if !self.lang.has_field(ctx.parent_id, field_id) {
304            self.emit_field_not_on_node(
305                name_token.text_range(),
306                name_token.text(),
307                ctx.parent_id,
308                ctx.parent_range,
309            );
310            return;
311        }
312
313        let Some(value) = field.value() else { return };
314        self.validate_expr_structure(&value, Some(*ctx), visited);
315    }
316
317    fn validate_negated_field(&mut self, neg: &ast::NegatedField, ctx: &ValidationContext) {
318        let Some(name_token) = neg.name() else {
319            return;
320        };
321        let field_name = name_token.text();
322
323        let Some(field_id) = self.node_field_ids.get(field_name).copied().flatten() else {
324            return;
325        };
326
327        if self.lang.has_field(ctx.parent_id, field_id) {
328            return;
329        }
330        self.emit_field_not_on_node(
331            name_token.text_range(),
332            field_name,
333            ctx.parent_id,
334            ctx.parent_range,
335        );
336    }
337
338    fn emit_field_not_on_node(
339        &mut self,
340        range: TextRange,
341        field_name: &str,
342        parent_id: NodeTypeId,
343        parent_range: TextRange,
344    ) {
345        let valid_fields = self.lang.fields_for_node_type(parent_id);
346        let parent_name = self
347            .lang
348            .node_type_name(parent_id)
349            .expect("validated parent_id must have a name");
350
351        let mut builder = self
352            .diagnostics
353            .report(self.source_id, DiagnosticKind::FieldNotOnNodeType, range)
354            .message(field_name)
355            .related_to(
356                self.source_id,
357                parent_range,
358                format!("on `{}`", parent_name),
359            );
360
361        if valid_fields.is_empty() {
362            builder = builder.hint(format!("`{}` has no fields", parent_name));
363        } else {
364            let max_dist = (field_name.len() / 3).clamp(2, 4);
365            if let Some(similar) = find_similar(field_name, &valid_fields, max_dist) {
366                builder = builder.hint(format!("did you mean `{}`?", similar));
367            }
368            builder = builder.hint(format!(
369                "valid fields for `{}`: {}",
370                parent_name,
371                format_list(&valid_fields, 5)
372            ));
373        }
374        builder.emit();
375    }
376}
377
378/// Format a list of items for display, truncating if too long.
379fn format_list(items: &[&str], max_items: usize) -> String {
380    if items.is_empty() {
381        return String::new();
382    }
383    if items.len() <= max_items {
384        items
385            .iter()
386            .map(|s| format!("`{}`", s))
387            .collect::<Vec<_>>()
388            .join(", ")
389    } else {
390        let shown: Vec<_> = items[..max_items]
391            .iter()
392            .map(|s| format!("`{}`", s))
393            .collect();
394        format!(
395            "{}, ... ({} more)",
396            shown.join(", "),
397            items.len() - max_items
398        )
399    }
400}
401
402/// Context for validating child types.
403#[derive(Clone, Copy)]
404struct ValidationContext {
405    /// The parent node type being validated against.
406    parent_id: NodeTypeId,
407    /// The parent node type token range for related_to.
408    parent_range: TextRange,
409}
410
411/// Combined symbol resolver for node types and fields.
412struct SymbolResolver<'l, 'a, 'q> {
413    linker: &'l mut Linker<'a, 'q>,
414}
415
416impl Visitor for SymbolResolver<'_, '_, '_> {
417    fn visit(&mut self, root: &ast::Root) {
418        walk(self, root);
419    }
420
421    fn visit_named_node(&mut self, node: &ast::NamedNode) {
422        self.linker.resolve_named_node(node);
423
424        for neg in node.as_cst().children().filter_map(ast::NegatedField::cast) {
425            self.linker.resolve_field_by_token(neg.name());
426        }
427
428        super::visitor::walk_named_node(self, node);
429    }
430
431    fn visit_anonymous_node(&mut self, node: &ast::AnonymousNode) {
432        if node.is_any() {
433            return;
434        }
435        let Some(value_token) = node.value() else {
436            return;
437        };
438        let value = value_token.text();
439        if self.linker.node_type_ids.contains_key(value) {
440            return;
441        }
442
443        let resolved = self.linker.lang.resolve_anonymous_node(value);
444        self.linker
445            .node_type_ids
446            .insert(token_src(&value_token, self.linker.source()), resolved);
447
448        if let Some(id) = resolved {
449            let sym = self.linker.interner.intern(value);
450            self.linker.output.node_type_ids.entry(sym).or_insert(id);
451            return;
452        }
453
454        self.linker
455            .diagnostics
456            .report(
457                self.linker.source_id,
458                DiagnosticKind::UnknownNodeType,
459                value_token.text_range(),
460            )
461            .message(value)
462            .emit();
463    }
464
465    fn visit_field_expr(&mut self, field: &ast::FieldExpr) {
466        self.linker.resolve_field_by_token(field.name());
467        super::visitor::walk_field_expr(self, field);
468    }
469}