Skip to main content

shape_lsp/
analysis.rs

1//! Shared semantic analysis pipeline for Shape LSP.
2//!
3//! This module centralizes semantic diagnostics so the language server and
4//! the `shape-test` fluent harness use the same logic.
5
6use crate::annotation_discovery::AnnotationDiscovery;
7use crate::diagnostics::{
8    error_to_diagnostic, validate_annotations, validate_async_join,
9    validate_async_structured_concurrency, validate_comptime_builtins_context,
10    validate_comptime_overrides, validate_comptime_side_effects, validate_content_strings,
11    validate_foreign_function_types, validate_interpolation_format_specs, validate_trait_bounds,
12};
13use crate::module_cache::ModuleCache;
14use crate::scope::ScopeTree;
15use crate::util::offset_to_line_col;
16use shape_ast::ast::{Expr, ImportItems, Item, Program};
17use shape_runtime::visitor::{Visitor, walk_program};
18use std::collections::{HashMap, HashSet};
19use tower_lsp_server::ls_types::{Diagnostic, DiagnosticSeverity, Position, Range};
20
21const MAX_SEMANTIC_DIAGNOSTICS: usize = 200;
22
23/// Run semantic diagnostics for a parsed Shape program.
24pub fn analyze_program_semantics(
25    program: &Program,
26    text: &str,
27    file_path: Option<&std::path::Path>,
28    module_cache: Option<&ModuleCache>,
29    workspace_root: Option<&std::path::Path>,
30) -> Vec<Diagnostic> {
31    let mut diagnostics = Vec::new();
32
33    // Discover annotations from the program and imports.
34    let mut annotation_discovery = AnnotationDiscovery::new();
35    annotation_discovery.discover_from_program(program);
36    if let (Some(path), Some(cache)) = (file_path, module_cache) {
37        annotation_discovery.discover_from_imports_with_cache(program, path, cache, workspace_root);
38    } else {
39        annotation_discovery.discover_from_imports(program);
40    }
41
42    diagnostics.extend(validate_annotations(program, &annotation_discovery, text));
43    diagnostics.extend(validate_async_join(program, text));
44    diagnostics.extend(validate_async_structured_concurrency(program, text));
45    diagnostics.extend(validate_interpolation_format_specs(program, text));
46    diagnostics.extend(validate_comptime_overrides(program, text));
47    diagnostics.extend(validate_comptime_side_effects(program, text));
48    diagnostics.extend(validate_comptime_builtins_context(program, text));
49    diagnostics.extend(validate_trait_bounds(program, text));
50    diagnostics.extend(validate_content_strings(program, text));
51    diagnostics.extend(validate_foreign_function_types(program, text));
52
53    let mut compiler = shape_vm::BytecodeCompiler::new();
54    compiler.set_type_diagnostic_mode(shape_vm::compiler::TypeDiagnosticMode::RecoverAll);
55    compiler.set_compile_diagnostic_mode(shape_vm::compiler::CompileDiagnosticMode::RecoverAll);
56
57    if let (Some(path), Some(cache)) = (file_path, module_cache) {
58        diagnostics.extend(validate_imports_and_register_items(
59            program,
60            text,
61            path,
62            cache,
63            workspace_root,
64            &mut compiler,
65        ));
66    }
67
68    if let Err(compile_error) = compiler.compile_with_source(program, text) {
69        let mut compile_diagnostics = error_to_diagnostic(&compile_error);
70        combine_same_line_undefined_variable_diagnostics(program, text, &mut compile_diagnostics);
71        diagnostics.extend(compile_diagnostics);
72    }
73
74    dedupe_and_cap_diagnostics(&mut diagnostics);
75    diagnostics
76}
77
78/// Validate import statements and register imported items in the compiler.
79pub fn validate_imports_and_register_items(
80    program: &Program,
81    text: &str,
82    file_path: &std::path::Path,
83    module_cache: &ModuleCache,
84    workspace_root: Option<&std::path::Path>,
85    compiler: &mut shape_vm::BytecodeCompiler,
86) -> Vec<Diagnostic> {
87    let mut diagnostics = Vec::new();
88    let importable_modules = module_cache.list_importable_modules_with_context_and_source(
89        file_path,
90        workspace_root,
91        Some(text),
92    );
93    let mut known_module_names = crate::completion::imports::module_names_with_context_and_source(
94        Some(file_path),
95        workspace_root,
96        Some(text),
97    );
98    known_module_names.extend(importable_modules.iter().filter_map(|module_path| {
99        module_path
100            .split('.')
101            .next()
102            .map(|segment| segment.to_string())
103    }));
104
105    for item in &program.items {
106        if let Item::Import(import_stmt, import_span) = item {
107            match &import_stmt.items {
108                ImportItems::Named(_) => {
109                    if let Some(module_info) = module_cache
110                        .load_module_by_import_with_context_and_source(
111                            &import_stmt.from,
112                            file_path,
113                            workspace_root,
114                            Some(text),
115                        )
116                    {
117                        compiler.register_imported_items(&module_info.program.items);
118                    } else {
119                        diagnostics.push(make_span_diagnostic(
120                            text,
121                            *import_span,
122                            format!(
123                                "Cannot resolve module '{}'. Verify the import path and declare dependencies in shape.toml when needed.",
124                                import_stmt.from
125                            ),
126                            DiagnosticSeverity::ERROR,
127                        ));
128                    }
129                }
130                ImportItems::Namespace { name, .. } => {
131                    if !known_module_names.iter().any(|module| module == name) {
132                        diagnostics.push(make_span_diagnostic(
133                            text,
134                            *import_span,
135                            format!(
136                                "Cannot resolve module '{}'. Verify the import path and declare dependencies in shape.toml when needed.",
137                                name
138                            ),
139                            DiagnosticSeverity::ERROR,
140                        ));
141                    }
142                }
143            }
144        }
145    }
146
147    diagnostics
148}
149
150fn make_span_diagnostic(
151    text: &str,
152    span: shape_ast::ast::Span,
153    message: String,
154    severity: DiagnosticSeverity,
155) -> Diagnostic {
156    let (start_line, start_col) = offset_to_line_col(text, span.start);
157    let (end_line, end_col) = offset_to_line_col(text, span.end);
158    Diagnostic {
159        range: Range {
160            start: Position {
161                line: start_line,
162                character: start_col,
163            },
164            end: Position {
165                line: end_line,
166                character: end_col,
167            },
168        },
169        severity: Some(severity),
170        message,
171        source: Some("shape".to_string()),
172        ..Default::default()
173    }
174}
175
176fn combine_same_line_undefined_variable_diagnostics(
177    program: &Program,
178    text: &str,
179    diagnostics: &mut Vec<Diagnostic>,
180) {
181    let mut undefined_diag_indices_by_line: HashMap<u32, Vec<usize>> = HashMap::new();
182    for (index, diagnostic) in diagnostics.iter().enumerate() {
183        if is_undefined_variable_message(&diagnostic.message) {
184            undefined_diag_indices_by_line
185                .entry(diagnostic.range.start.line)
186                .or_default()
187                .push(index);
188        }
189    }
190
191    if undefined_diag_indices_by_line.is_empty() {
192        return;
193    }
194
195    let undefined_names_by_line = collect_undefined_identifier_names_by_line(program, text);
196    if undefined_names_by_line.is_empty() {
197        return;
198    }
199
200    let mut indices_to_drop: HashSet<usize> = HashSet::new();
201
202    for (line, diag_indices) in undefined_diag_indices_by_line {
203        let Some(undefined_names) = undefined_names_by_line.get(&line) else {
204            continue;
205        };
206
207        if undefined_names.len() <= 1 {
208            continue;
209        }
210
211        let first_index = diag_indices[0];
212        diagnostics[first_index].message = format!(
213            "Undefined variables: {}",
214            undefined_names
215                .iter()
216                .map(|name| format!("'{}'", name))
217                .collect::<Vec<_>>()
218                .join(", ")
219        );
220
221        for index in diag_indices.into_iter().skip(1) {
222            indices_to_drop.insert(index);
223        }
224    }
225
226    if indices_to_drop.is_empty() {
227        return;
228    }
229
230    let mut filtered = Vec::with_capacity(diagnostics.len().saturating_sub(indices_to_drop.len()));
231    for (index, diagnostic) in diagnostics.drain(..).enumerate() {
232        if !indices_to_drop.contains(&index) {
233            filtered.push(diagnostic);
234        }
235    }
236    *diagnostics = filtered;
237}
238
239fn is_undefined_variable_message(message: &str) -> bool {
240    message.starts_with("Undefined variable: '") || message.starts_with("Undefined variable: ")
241}
242
243#[derive(Default)]
244struct IdentifierCollector {
245    identifiers: Vec<(String, shape_ast::ast::Span)>,
246}
247
248impl Visitor for IdentifierCollector {
249    fn visit_expr(&mut self, expr: &Expr) -> bool {
250        if let Expr::Identifier(name, span) = expr
251            && !span.is_dummy()
252        {
253            self.identifiers.push((name.clone(), *span));
254        }
255        true
256    }
257}
258
259fn collect_undefined_identifier_names_by_line(
260    program: &Program,
261    text: &str,
262) -> HashMap<u32, Vec<String>> {
263    let scope_tree = ScopeTree::build(program, text);
264    let mut collector = IdentifierCollector::default();
265    walk_program(&mut collector, program);
266
267    let mut by_line_with_offsets: HashMap<u32, Vec<(usize, String)>> = HashMap::new();
268    for (name, span) in collector.identifiers {
269        if scope_tree.binding_at(span.start).is_some() {
270            continue;
271        }
272        let (line, _) = offset_to_line_col(text, span.start);
273        by_line_with_offsets
274            .entry(line)
275            .or_default()
276            .push((span.start, name));
277    }
278
279    let mut by_line: HashMap<u32, Vec<String>> = HashMap::new();
280    for (line, mut names_with_offsets) in by_line_with_offsets {
281        names_with_offsets.sort_by_key(|(offset, _)| *offset);
282        let mut seen = HashSet::new();
283        let mut names = Vec::new();
284        for (_, name) in names_with_offsets {
285            if seen.insert(name.clone()) {
286                names.push(name);
287            }
288        }
289        if !names.is_empty() {
290            by_line.insert(line, names);
291        }
292    }
293
294    by_line
295}
296
297fn dedupe_and_cap_diagnostics(diagnostics: &mut Vec<Diagnostic>) {
298    let mut seen = HashSet::new();
299    diagnostics.retain(|diagnostic| seen.insert(diagnostic_dedupe_key(diagnostic)));
300    if diagnostics.len() > MAX_SEMANTIC_DIAGNOSTICS {
301        diagnostics.truncate(MAX_SEMANTIC_DIAGNOSTICS);
302    }
303}
304
305fn diagnostic_dedupe_key(diagnostic: &Diagnostic) -> String {
306    format!(
307        "{}:{}:{}",
308        diagnostic.range.start.line,
309        diagnostic.range.start.character,
310        normalize_diagnostic_message(&diagnostic.message)
311    )
312}
313
314fn normalize_diagnostic_message(message: &str) -> String {
315    if let Some(canonical) = canonicalize_undefined_variable_message(message) {
316        return canonical;
317    }
318    message.split_whitespace().collect::<Vec<_>>().join(" ")
319}
320
321fn canonicalize_undefined_variable_message(message: &str) -> Option<String> {
322    const PREFIX: &str = "Undefined variable:";
323    if !message.starts_with(PREFIX) {
324        return None;
325    }
326    let rest = message[PREFIX.len()..].trim();
327    let trimmed = rest.trim_start_matches('\'');
328    let name: String = trimmed
329        .chars()
330        .take_while(|ch| ch.is_alphanumeric() || *ch == '_')
331        .collect();
332    if name.is_empty() {
333        Some("undefined variable".to_string())
334    } else {
335        Some(format!("undefined variable:{}", name))
336    }
337}
338
339#[cfg(test)]
340mod tests {
341    use super::*;
342    use shape_ast::parser::parse_program;
343
344    #[test]
345    fn semantic_analysis_keeps_named_decomposition_bindings_defined() {
346        let source = r#"let a = { x: 1}
347let b = { z: 3}
348//print(a.y) //compiler error: no y (even though a has y in the shape via optimistic hoisting, see next line)
349a.y = 2
350print(a.y) //works!
351let c = a+b //resulting type is {x: int, y: int, z: int}
352//destructuring works, e.g.
353let (d:{x}, e: {y, z})  = c
354//destructuring to named structs works also but need the as keyword:
355type TypeA {x: int, y: int}
356type TypeB {z: int}
357let (f:TypeA, g: TypeB) = c as (TypeA+TypeB)
358print(f, g)
359"#;
360
361        let program = parse_program(source).expect("program should parse");
362        let symbols = crate::symbols::extract_symbols(&program);
363        assert!(
364            symbols.iter().any(|s| s.name == "f"),
365            "parser/symbol extraction should include decomposition binding f: {:?}",
366            symbols.iter().map(|s| s.name.as_str()).collect::<Vec<_>>()
367        );
368        assert!(
369            symbols.iter().any(|s| s.name == "g"),
370            "parser/symbol extraction should include decomposition binding g: {:?}",
371            symbols.iter().map(|s| s.name.as_str()).collect::<Vec<_>>()
372        );
373        let temp_dir = tempfile::tempdir().expect("tempdir");
374        let file_path = temp_dir.path().join("script.shape");
375        std::fs::write(&file_path, source).expect("write source");
376        let module_cache = ModuleCache::new();
377
378        let diagnostics = analyze_program_semantics(
379            &program,
380            source,
381            Some(&file_path),
382            Some(&module_cache),
383            None,
384        );
385
386        assert!(
387            diagnostics
388                .iter()
389                .all(|diag| !diag.message.contains("Undefined variable: 'f'")),
390            "unexpected diagnostics: {:?}",
391            diagnostics
392                .iter()
393                .map(|d| d.message.as_str())
394                .collect::<Vec<_>>()
395        );
396        assert!(
397            diagnostics
398                .iter()
399                .all(|diag| !diag.message.contains("Undefined variable: 'g'")),
400            "unexpected diagnostics: {:?}",
401            diagnostics
402                .iter()
403                .map(|d| d.message.as_str())
404                .collect::<Vec<_>>()
405        );
406    }
407
408    #[test]
409    fn semantic_analysis_combines_undefined_variables_on_same_line() {
410        let source = "print(h, i)\n";
411        let program = parse_program(source).expect("program should parse");
412
413        let diagnostics = analyze_program_semantics(&program, source, None, None, None);
414
415        let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
416        assert!(
417            messages
418                .iter()
419                .any(|message| message.contains("Undefined variables: 'h', 'i'")),
420            "expected combined undefined variable diagnostic, got {:?}",
421            messages
422        );
423        assert!(
424            messages
425                .iter()
426                .all(|message| !message.contains("Undefined variable: 'h'")),
427            "did not expect singular undefined diagnostic for h, got {:?}",
428            messages
429        );
430    }
431
432    #[test]
433    fn semantic_analysis_reports_undefined_variables_on_multiple_lines() {
434        let source = "print(h)\nprint(i)\n";
435        let program = parse_program(source).expect("program should parse");
436
437        let diagnostics = analyze_program_semantics(&program, source, None, None, None);
438
439        assert!(
440            diagnostics.iter().any(|diag| {
441                diag.range.start.line == 0 && is_undefined_variable_message(&diag.message)
442            }),
443            "expected undefined variable diagnostic on line 0, got {:?}",
444            diagnostics
445                .iter()
446                .map(|d| (d.range.start.line, d.message.as_str()))
447                .collect::<Vec<_>>()
448        );
449        assert!(
450            diagnostics.iter().any(|diag| {
451                diag.range.start.line == 1 && is_undefined_variable_message(&diag.message)
452            }),
453            "expected undefined variable diagnostic on line 1, got {:?}",
454            diagnostics
455                .iter()
456                .map(|d| (d.range.start.line, d.message.as_str()))
457                .collect::<Vec<_>>()
458        );
459    }
460
461    #[test]
462    fn semantic_analysis_combines_same_line_and_keeps_next_line_diagnostic() {
463        let source = "print(h, i)\nprint(j)\n";
464        let program = parse_program(source).expect("program should parse");
465
466        let diagnostics = analyze_program_semantics(&program, source, None, None, None);
467        let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
468
469        assert!(
470            messages
471                .iter()
472                .any(|message| message.contains("Undefined variables: 'h', 'i'")),
473            "expected combined diagnostic for line 0, got {:?}",
474            messages
475        );
476        assert!(
477            diagnostics.iter().any(|diag| {
478                diag.range.start.line == 1 && is_undefined_variable_message(&diag.message)
479            }),
480            "expected undefined diagnostic on line 1, got {:?}",
481            diagnostics
482                .iter()
483                .map(|d| (d.range.start.line, d.message.as_str()))
484                .collect::<Vec<_>>()
485        );
486    }
487
488    #[test]
489    fn semantic_analysis_frontmatter_foreign_function_percentile_call_has_no_type_mismatch() {
490        let source = r#"---
491[[extensions]]
492name = "python"
493path = "/tmp/libshape_ext_python.so"
494---
495fn python percentile(values: Array<number>, pct: number) -> number {
496  sorted_v = sorted(values)
497  k = (len(sorted_v) - 1) * (pct / 100.0)
498  f = int(k)
499  c = f + 1
500  if c >= len(sorted_v):
501    return sorted_v[-1]
502  return sorted_v[f] + (k - f) * (sorted_v[c] - sorted_v[f])
503}
504
505print(percentile([1.0, 2.0, 3.0], 50.0))
506"#;
507
508        let parse_source = crate::util::parser_source(source);
509        let program = parse_program(parse_source.as_ref()).expect("program should parse");
510        let foreign_fn = program
511            .items
512            .iter()
513            .find_map(|item| match item {
514                Item::ForeignFunction(def, _) if def.name == "percentile" => Some(def),
515                _ => None,
516            })
517            .expect("percentile foreign function should be present");
518        let first_param = foreign_fn
519            .params
520            .first()
521            .and_then(|p| p.type_annotation.as_ref())
522            .expect("first param annotation");
523        assert_eq!(
524            first_param.to_type_string(),
525            "Array<number>",
526            "unexpected foreign parameter annotation AST: {:?}",
527            first_param
528        );
529        let diagnostics = analyze_program_semantics(&program, source, None, None, None);
530
531        let mismatch_messages: Vec<&str> = diagnostics
532            .iter()
533            .map(|d| d.message.as_str())
534            .filter(|m| m.contains("Could not solve type constraints"))
535            .collect();
536        assert!(
537            mismatch_messages.is_empty(),
538            "unexpected type constraint diagnostics: {:?}",
539            mismatch_messages
540        );
541    }
542
543    #[test]
544    fn semantic_analysis_foreign_function_accepts_struct_array_argument() {
545        let source = r#"type Measurement {
546  timestamp: string,
547  value: number,
548  sensor_id: string,
549}
550
551fn python outlier_ratio(readings: Array<Measurement>, z_threshold: number) -> number {
552  values = [r['value'] for r in readings]
553  mean = sum(values) / len(values)
554  std = (sum((v - mean) ** 2 for v in values) / len(values)) ** 0.5
555  outliers = [v for v in values if abs(v - mean) > z_threshold * std]
556  return len(outliers) / len(values)
557}
558
559let readings: Array<Measurement> = [
560  { timestamp: "2026-02-22T10:00:00Z", value: 10.0, sensor_id: "A" },
561  { timestamp: "2026-02-22T10:01:00Z", value: 10.5, sensor_id: "A" },
562  { timestamp: "2026-02-22T10:02:00Z", value: 9.8, sensor_id: "A" },
563  { timestamp: "2026-02-22T10:03:00Z", value: 10.2, sensor_id: "A" },
564  { timestamp: "2026-02-22T10:04:00Z", value: 35.0, sensor_id: "A" }
565]
566
567print(outlier_ratio(readings, 1.5))
568"#;
569
570        let program = parse_program(source).expect("program should parse");
571        let diagnostics = analyze_program_semantics(&program, source, None, None, None);
572
573        let mismatch_messages: Vec<&str> = diagnostics
574            .iter()
575            .map(|d| d.message.as_str())
576            .filter(|m| m.contains("Could not solve type constraints"))
577            .collect();
578        assert!(
579            mismatch_messages.is_empty(),
580            "unexpected type constraint diagnostics: {:?}",
581            mismatch_messages
582        );
583    }
584}