1use crate::annotation_discovery::AnnotationDiscovery;
7use crate::diagnostics::{
8 error_to_diagnostic, validate_annotations, validate_async_join,
9 validate_async_structured_concurrency, validate_comptime_builtins_context,
10 validate_comptime_overrides, validate_comptime_side_effects, validate_content_strings,
11 validate_foreign_function_types, validate_interpolation_format_specs, validate_trait_bounds,
12};
13use crate::module_cache::ModuleCache;
14use crate::scope::ScopeTree;
15use crate::util::offset_to_line_col;
16use shape_ast::ast::{Expr, ImportItems, Item, Program};
17use shape_runtime::visitor::{Visitor, walk_program};
18use std::collections::{HashMap, HashSet};
19use tower_lsp_server::ls_types::{Diagnostic, DiagnosticSeverity, Position, Range};
20
21const MAX_SEMANTIC_DIAGNOSTICS: usize = 200;
22
23pub fn analyze_program_semantics(
25 program: &Program,
26 text: &str,
27 file_path: Option<&std::path::Path>,
28 module_cache: Option<&ModuleCache>,
29 workspace_root: Option<&std::path::Path>,
30) -> Vec<Diagnostic> {
31 let mut diagnostics = Vec::new();
32
33 let mut annotation_discovery = AnnotationDiscovery::new();
35 annotation_discovery.discover_from_program(program);
36 if let (Some(path), Some(cache)) = (file_path, module_cache) {
37 annotation_discovery.discover_from_imports_with_cache(program, path, cache, workspace_root);
38 } else {
39 annotation_discovery.discover_from_imports(program);
40 }
41
42 diagnostics.extend(validate_annotations(program, &annotation_discovery, text));
43 diagnostics.extend(validate_async_join(program, text));
44 diagnostics.extend(validate_async_structured_concurrency(program, text));
45 diagnostics.extend(validate_interpolation_format_specs(program, text));
46 diagnostics.extend(validate_comptime_overrides(program, text));
47 diagnostics.extend(validate_comptime_side_effects(program, text));
48 diagnostics.extend(validate_comptime_builtins_context(program, text));
49 diagnostics.extend(validate_trait_bounds(program, text));
50 diagnostics.extend(validate_content_strings(program, text));
51 diagnostics.extend(validate_foreign_function_types(program, text));
52
53 let mut compiler = shape_vm::BytecodeCompiler::new();
54 compiler.set_type_diagnostic_mode(shape_vm::compiler::TypeDiagnosticMode::RecoverAll);
55 compiler.set_compile_diagnostic_mode(shape_vm::compiler::CompileDiagnosticMode::RecoverAll);
56
57 if let (Some(path), Some(cache)) = (file_path, module_cache) {
58 diagnostics.extend(validate_imports_and_register_items(
59 program,
60 text,
61 path,
62 cache,
63 workspace_root,
64 &mut compiler,
65 ));
66 }
67
68 if let Err(compile_error) = compiler.compile_with_source(program, text) {
69 let mut compile_diagnostics = error_to_diagnostic(&compile_error);
70 combine_same_line_undefined_variable_diagnostics(program, text, &mut compile_diagnostics);
71 diagnostics.extend(compile_diagnostics);
72 }
73
74 dedupe_and_cap_diagnostics(&mut diagnostics);
75 diagnostics
76}
77
78pub fn validate_imports_and_register_items(
80 program: &Program,
81 text: &str,
82 file_path: &std::path::Path,
83 module_cache: &ModuleCache,
84 workspace_root: Option<&std::path::Path>,
85 compiler: &mut shape_vm::BytecodeCompiler,
86) -> Vec<Diagnostic> {
87 let mut diagnostics = Vec::new();
88 let importable_modules = module_cache.list_importable_modules_with_context_and_source(
89 file_path,
90 workspace_root,
91 Some(text),
92 );
93 let mut known_module_names = crate::completion::imports::module_names_with_context_and_source(
94 Some(file_path),
95 workspace_root,
96 Some(text),
97 );
98 known_module_names.extend(importable_modules.iter().filter_map(|module_path| {
99 module_path
100 .split('.')
101 .next()
102 .map(|segment| segment.to_string())
103 }));
104
105 for item in &program.items {
106 if let Item::Import(import_stmt, import_span) = item {
107 match &import_stmt.items {
108 ImportItems::Named(_) => {
109 if let Some(module_info) = module_cache
110 .load_module_by_import_with_context_and_source(
111 &import_stmt.from,
112 file_path,
113 workspace_root,
114 Some(text),
115 )
116 {
117 compiler.register_imported_items(&module_info.program.items);
118 } else {
119 diagnostics.push(make_span_diagnostic(
120 text,
121 *import_span,
122 format!(
123 "Cannot resolve module '{}'. Verify the import path and declare dependencies in shape.toml when needed.",
124 import_stmt.from
125 ),
126 DiagnosticSeverity::ERROR,
127 ));
128 }
129 }
130 ImportItems::Namespace { name, .. } => {
131 if !known_module_names.iter().any(|module| module == name) {
132 diagnostics.push(make_span_diagnostic(
133 text,
134 *import_span,
135 format!(
136 "Cannot resolve module '{}'. Verify the import path and declare dependencies in shape.toml when needed.",
137 name
138 ),
139 DiagnosticSeverity::ERROR,
140 ));
141 }
142 }
143 }
144 }
145 }
146
147 diagnostics
148}
149
150fn make_span_diagnostic(
151 text: &str,
152 span: shape_ast::ast::Span,
153 message: String,
154 severity: DiagnosticSeverity,
155) -> Diagnostic {
156 let (start_line, start_col) = offset_to_line_col(text, span.start);
157 let (end_line, end_col) = offset_to_line_col(text, span.end);
158 Diagnostic {
159 range: Range {
160 start: Position {
161 line: start_line,
162 character: start_col,
163 },
164 end: Position {
165 line: end_line,
166 character: end_col,
167 },
168 },
169 severity: Some(severity),
170 message,
171 source: Some("shape".to_string()),
172 ..Default::default()
173 }
174}
175
176fn combine_same_line_undefined_variable_diagnostics(
177 program: &Program,
178 text: &str,
179 diagnostics: &mut Vec<Diagnostic>,
180) {
181 let mut undefined_diag_indices_by_line: HashMap<u32, Vec<usize>> = HashMap::new();
182 for (index, diagnostic) in diagnostics.iter().enumerate() {
183 if is_undefined_variable_message(&diagnostic.message) {
184 undefined_diag_indices_by_line
185 .entry(diagnostic.range.start.line)
186 .or_default()
187 .push(index);
188 }
189 }
190
191 if undefined_diag_indices_by_line.is_empty() {
192 return;
193 }
194
195 let undefined_names_by_line = collect_undefined_identifier_names_by_line(program, text);
196 if undefined_names_by_line.is_empty() {
197 return;
198 }
199
200 let mut indices_to_drop: HashSet<usize> = HashSet::new();
201
202 for (line, diag_indices) in undefined_diag_indices_by_line {
203 let Some(undefined_names) = undefined_names_by_line.get(&line) else {
204 continue;
205 };
206
207 if undefined_names.len() <= 1 {
208 continue;
209 }
210
211 let first_index = diag_indices[0];
212 diagnostics[first_index].message = format!(
213 "Undefined variables: {}",
214 undefined_names
215 .iter()
216 .map(|name| format!("'{}'", name))
217 .collect::<Vec<_>>()
218 .join(", ")
219 );
220
221 for index in diag_indices.into_iter().skip(1) {
222 indices_to_drop.insert(index);
223 }
224 }
225
226 if indices_to_drop.is_empty() {
227 return;
228 }
229
230 let mut filtered = Vec::with_capacity(diagnostics.len().saturating_sub(indices_to_drop.len()));
231 for (index, diagnostic) in diagnostics.drain(..).enumerate() {
232 if !indices_to_drop.contains(&index) {
233 filtered.push(diagnostic);
234 }
235 }
236 *diagnostics = filtered;
237}
238
239fn is_undefined_variable_message(message: &str) -> bool {
240 message.starts_with("Undefined variable: '") || message.starts_with("Undefined variable: ")
241}
242
243#[derive(Default)]
244struct IdentifierCollector {
245 identifiers: Vec<(String, shape_ast::ast::Span)>,
246}
247
248impl Visitor for IdentifierCollector {
249 fn visit_expr(&mut self, expr: &Expr) -> bool {
250 if let Expr::Identifier(name, span) = expr
251 && !span.is_dummy()
252 {
253 self.identifiers.push((name.clone(), *span));
254 }
255 true
256 }
257}
258
259fn collect_undefined_identifier_names_by_line(
260 program: &Program,
261 text: &str,
262) -> HashMap<u32, Vec<String>> {
263 let scope_tree = ScopeTree::build(program, text);
264 let mut collector = IdentifierCollector::default();
265 walk_program(&mut collector, program);
266
267 let mut by_line_with_offsets: HashMap<u32, Vec<(usize, String)>> = HashMap::new();
268 for (name, span) in collector.identifiers {
269 if scope_tree.binding_at(span.start).is_some() {
270 continue;
271 }
272 let (line, _) = offset_to_line_col(text, span.start);
273 by_line_with_offsets
274 .entry(line)
275 .or_default()
276 .push((span.start, name));
277 }
278
279 let mut by_line: HashMap<u32, Vec<String>> = HashMap::new();
280 for (line, mut names_with_offsets) in by_line_with_offsets {
281 names_with_offsets.sort_by_key(|(offset, _)| *offset);
282 let mut seen = HashSet::new();
283 let mut names = Vec::new();
284 for (_, name) in names_with_offsets {
285 if seen.insert(name.clone()) {
286 names.push(name);
287 }
288 }
289 if !names.is_empty() {
290 by_line.insert(line, names);
291 }
292 }
293
294 by_line
295}
296
297fn dedupe_and_cap_diagnostics(diagnostics: &mut Vec<Diagnostic>) {
298 let mut seen = HashSet::new();
299 diagnostics.retain(|diagnostic| seen.insert(diagnostic_dedupe_key(diagnostic)));
300 if diagnostics.len() > MAX_SEMANTIC_DIAGNOSTICS {
301 diagnostics.truncate(MAX_SEMANTIC_DIAGNOSTICS);
302 }
303}
304
305fn diagnostic_dedupe_key(diagnostic: &Diagnostic) -> String {
306 format!(
307 "{}:{}:{}",
308 diagnostic.range.start.line,
309 diagnostic.range.start.character,
310 normalize_diagnostic_message(&diagnostic.message)
311 )
312}
313
314fn normalize_diagnostic_message(message: &str) -> String {
315 if let Some(canonical) = canonicalize_undefined_variable_message(message) {
316 return canonical;
317 }
318 message.split_whitespace().collect::<Vec<_>>().join(" ")
319}
320
321fn canonicalize_undefined_variable_message(message: &str) -> Option<String> {
322 const PREFIX: &str = "Undefined variable:";
323 if !message.starts_with(PREFIX) {
324 return None;
325 }
326 let rest = message[PREFIX.len()..].trim();
327 let trimmed = rest.trim_start_matches('\'');
328 let name: String = trimmed
329 .chars()
330 .take_while(|ch| ch.is_alphanumeric() || *ch == '_')
331 .collect();
332 if name.is_empty() {
333 Some("undefined variable".to_string())
334 } else {
335 Some(format!("undefined variable:{}", name))
336 }
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342 use shape_ast::parser::parse_program;
343
344 #[test]
345 fn semantic_analysis_keeps_named_decomposition_bindings_defined() {
346 let source = r#"let a = { x: 1}
347let b = { z: 3}
348//print(a.y) //compiler error: no y (even though a has y in the shape via optimistic hoisting, see next line)
349a.y = 2
350print(a.y) //works!
351let c = a+b //resulting type is {x: int, y: int, z: int}
352//destructuring works, e.g.
353let (d:{x}, e: {y, z}) = c
354//destructuring to named structs works also but need the as keyword:
355type TypeA {x: int, y: int}
356type TypeB {z: int}
357let (f:TypeA, g: TypeB) = c as (TypeA+TypeB)
358print(f, g)
359"#;
360
361 let program = parse_program(source).expect("program should parse");
362 let symbols = crate::symbols::extract_symbols(&program);
363 assert!(
364 symbols.iter().any(|s| s.name == "f"),
365 "parser/symbol extraction should include decomposition binding f: {:?}",
366 symbols.iter().map(|s| s.name.as_str()).collect::<Vec<_>>()
367 );
368 assert!(
369 symbols.iter().any(|s| s.name == "g"),
370 "parser/symbol extraction should include decomposition binding g: {:?}",
371 symbols.iter().map(|s| s.name.as_str()).collect::<Vec<_>>()
372 );
373 let temp_dir = tempfile::tempdir().expect("tempdir");
374 let file_path = temp_dir.path().join("script.shape");
375 std::fs::write(&file_path, source).expect("write source");
376 let module_cache = ModuleCache::new();
377
378 let diagnostics = analyze_program_semantics(
379 &program,
380 source,
381 Some(&file_path),
382 Some(&module_cache),
383 None,
384 );
385
386 assert!(
387 diagnostics
388 .iter()
389 .all(|diag| !diag.message.contains("Undefined variable: 'f'")),
390 "unexpected diagnostics: {:?}",
391 diagnostics
392 .iter()
393 .map(|d| d.message.as_str())
394 .collect::<Vec<_>>()
395 );
396 assert!(
397 diagnostics
398 .iter()
399 .all(|diag| !diag.message.contains("Undefined variable: 'g'")),
400 "unexpected diagnostics: {:?}",
401 diagnostics
402 .iter()
403 .map(|d| d.message.as_str())
404 .collect::<Vec<_>>()
405 );
406 }
407
408 #[test]
409 fn semantic_analysis_combines_undefined_variables_on_same_line() {
410 let source = "print(h, i)\n";
411 let program = parse_program(source).expect("program should parse");
412
413 let diagnostics = analyze_program_semantics(&program, source, None, None, None);
414
415 let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
416 assert!(
417 messages
418 .iter()
419 .any(|message| message.contains("Undefined variables: 'h', 'i'")),
420 "expected combined undefined variable diagnostic, got {:?}",
421 messages
422 );
423 assert!(
424 messages
425 .iter()
426 .all(|message| !message.contains("Undefined variable: 'h'")),
427 "did not expect singular undefined diagnostic for h, got {:?}",
428 messages
429 );
430 }
431
432 #[test]
433 fn semantic_analysis_reports_undefined_variables_on_multiple_lines() {
434 let source = "print(h)\nprint(i)\n";
435 let program = parse_program(source).expect("program should parse");
436
437 let diagnostics = analyze_program_semantics(&program, source, None, None, None);
438
439 assert!(
440 diagnostics.iter().any(|diag| {
441 diag.range.start.line == 0 && is_undefined_variable_message(&diag.message)
442 }),
443 "expected undefined variable diagnostic on line 0, got {:?}",
444 diagnostics
445 .iter()
446 .map(|d| (d.range.start.line, d.message.as_str()))
447 .collect::<Vec<_>>()
448 );
449 assert!(
450 diagnostics.iter().any(|diag| {
451 diag.range.start.line == 1 && is_undefined_variable_message(&diag.message)
452 }),
453 "expected undefined variable diagnostic on line 1, got {:?}",
454 diagnostics
455 .iter()
456 .map(|d| (d.range.start.line, d.message.as_str()))
457 .collect::<Vec<_>>()
458 );
459 }
460
461 #[test]
462 fn semantic_analysis_combines_same_line_and_keeps_next_line_diagnostic() {
463 let source = "print(h, i)\nprint(j)\n";
464 let program = parse_program(source).expect("program should parse");
465
466 let diagnostics = analyze_program_semantics(&program, source, None, None, None);
467 let messages: Vec<&str> = diagnostics.iter().map(|d| d.message.as_str()).collect();
468
469 assert!(
470 messages
471 .iter()
472 .any(|message| message.contains("Undefined variables: 'h', 'i'")),
473 "expected combined diagnostic for line 0, got {:?}",
474 messages
475 );
476 assert!(
477 diagnostics.iter().any(|diag| {
478 diag.range.start.line == 1 && is_undefined_variable_message(&diag.message)
479 }),
480 "expected undefined diagnostic on line 1, got {:?}",
481 diagnostics
482 .iter()
483 .map(|d| (d.range.start.line, d.message.as_str()))
484 .collect::<Vec<_>>()
485 );
486 }
487
488 #[test]
489 fn semantic_analysis_frontmatter_foreign_function_percentile_call_has_no_type_mismatch() {
490 let source = r#"---
491[[extensions]]
492name = "python"
493path = "/tmp/libshape_ext_python.so"
494---
495fn python percentile(values: Array<number>, pct: number) -> number {
496 sorted_v = sorted(values)
497 k = (len(sorted_v) - 1) * (pct / 100.0)
498 f = int(k)
499 c = f + 1
500 if c >= len(sorted_v):
501 return sorted_v[-1]
502 return sorted_v[f] + (k - f) * (sorted_v[c] - sorted_v[f])
503}
504
505print(percentile([1.0, 2.0, 3.0], 50.0))
506"#;
507
508 let parse_source = crate::util::parser_source(source);
509 let program = parse_program(parse_source.as_ref()).expect("program should parse");
510 let foreign_fn = program
511 .items
512 .iter()
513 .find_map(|item| match item {
514 Item::ForeignFunction(def, _) if def.name == "percentile" => Some(def),
515 _ => None,
516 })
517 .expect("percentile foreign function should be present");
518 let first_param = foreign_fn
519 .params
520 .first()
521 .and_then(|p| p.type_annotation.as_ref())
522 .expect("first param annotation");
523 assert_eq!(
524 first_param.to_type_string(),
525 "Array<number>",
526 "unexpected foreign parameter annotation AST: {:?}",
527 first_param
528 );
529 let diagnostics = analyze_program_semantics(&program, source, None, None, None);
530
531 let mismatch_messages: Vec<&str> = diagnostics
532 .iter()
533 .map(|d| d.message.as_str())
534 .filter(|m| m.contains("Could not solve type constraints"))
535 .collect();
536 assert!(
537 mismatch_messages.is_empty(),
538 "unexpected type constraint diagnostics: {:?}",
539 mismatch_messages
540 );
541 }
542
543 #[test]
544 fn semantic_analysis_foreign_function_accepts_struct_array_argument() {
545 let source = r#"type Measurement {
546 timestamp: string,
547 value: number,
548 sensor_id: string,
549}
550
551fn python outlier_ratio(readings: Array<Measurement>, z_threshold: number) -> number {
552 values = [r['value'] for r in readings]
553 mean = sum(values) / len(values)
554 std = (sum((v - mean) ** 2 for v in values) / len(values)) ** 0.5
555 outliers = [v for v in values if abs(v - mean) > z_threshold * std]
556 return len(outliers) / len(values)
557}
558
559let readings: Array<Measurement> = [
560 { timestamp: "2026-02-22T10:00:00Z", value: 10.0, sensor_id: "A" },
561 { timestamp: "2026-02-22T10:01:00Z", value: 10.5, sensor_id: "A" },
562 { timestamp: "2026-02-22T10:02:00Z", value: 9.8, sensor_id: "A" },
563 { timestamp: "2026-02-22T10:03:00Z", value: 10.2, sensor_id: "A" },
564 { timestamp: "2026-02-22T10:04:00Z", value: 35.0, sensor_id: "A" }
565]
566
567print(outlier_ratio(readings, 1.5))
568"#;
569
570 let program = parse_program(source).expect("program should parse");
571 let diagnostics = analyze_program_semantics(&program, source, None, None, None);
572
573 let mismatch_messages: Vec<&str> = diagnostics
574 .iter()
575 .map(|d| d.message.as_str())
576 .filter(|m| m.contains("Could not solve type constraints"))
577 .collect();
578 assert!(
579 mismatch_messages.is_empty(),
580 "unexpected type constraint diagnostics: {:?}",
581 mismatch_messages
582 );
583 }
584}