Skip to main content

harn_parser/typechecker/
mod.rs

1use std::collections::HashSet;
2
3use crate::ast::*;
4use crate::builtin_signatures;
5use harn_lexer::{FixEdit, Span};
6
7mod binary_ops;
8mod exits;
9mod format;
10mod inference;
11mod schema_inference;
12mod scope;
13mod union;
14
15pub use exits::{block_definitely_exits, stmt_definitely_exits};
16pub use format::{format_type, shape_mismatch_detail};
17
18use schema_inference::schema_type_expr_from_node;
19use scope::TypeScope;
20
21/// An inlay hint produced during type checking.
22#[derive(Debug, Clone)]
23pub struct InlayHintInfo {
24    /// Position (line, column) where the hint should be displayed (after the variable name).
25    pub line: usize,
26    pub column: usize,
27    /// The type label to display (e.g. ": string").
28    pub label: String,
29}
30
31/// A diagnostic produced by the type checker.
32#[derive(Debug, Clone)]
33pub struct TypeDiagnostic {
34    pub message: String,
35    pub severity: DiagnosticSeverity,
36    pub span: Option<Span>,
37    pub help: Option<String>,
38    /// Machine-applicable fix edits.
39    pub fix: Option<Vec<FixEdit>>,
40    /// Optional structured payload that higher-level tooling (e.g. the
41    /// LSP code-action provider) can consume to synthesise fixes that
42    /// need more than a static `FixEdit`. Out-of-band from `fix` so the
43    /// string-based rendering pipeline doesn't have to care.
44    pub details: Option<DiagnosticDetails>,
45}
46
47/// Optional structured companion data on a `TypeDiagnostic`. The
48/// variants map one-to-one with diagnostics that have specific
49/// tooling-consumable state beyond the human-readable message; each
50/// variant is attached only by the sites that produce its
51/// corresponding diagnostic, so a consumer can pattern-match on the
52/// variant without parsing the error string.
53#[derive(Debug, Clone)]
54pub enum DiagnosticDetails {
55    /// A `match` expression with missing variant coverage. `missing`
56    /// holds the formatted literal values of each uncovered variant
57    /// (quoted for strings, bare for ints), ready to drop into a new
58    /// arm prefix. The diagnostic's `span` covers the whole `match`
59    /// expression, so a code-action can locate the closing `}` by
60    /// reading the source at `span.end`.
61    NonExhaustiveMatch { missing: Vec<String> },
62}
63
64#[derive(Debug, Clone, Copy, PartialEq, Eq)]
65pub enum DiagnosticSeverity {
66    Error,
67    Warning,
68}
69
70/// The static type checker.
71pub struct TypeChecker {
72    diagnostics: Vec<TypeDiagnostic>,
73    scope: TypeScope,
74    source: Option<String>,
75    hints: Vec<InlayHintInfo>,
76    /// When true, flag unvalidated boundary-API values used in field access.
77    strict_types: bool,
78    /// Lexical depth of enclosing function-like bodies (fn/tool/pipeline/closure).
79    /// `try*` requires `fn_depth > 0` so the rethrow has a body to live in.
80    fn_depth: usize,
81    /// Maps function name -> deprecation metadata `(since, use_hint)`. Populated
82    /// when an `@deprecated` attribute is encountered on a top-level fn decl
83    /// during the `check_inner` pre-pass; consulted at every `FunctionCall`
84    /// site to emit a warning + help line.
85    deprecated_fns: std::collections::HashMap<String, (Option<String>, Option<String>)>,
86    /// Names statically known to be introduced by cross-module imports
87    /// (resolved via `harn-modules`). `Some(set)` switches the checker into
88    /// strict cross-module mode: an unresolved callable name is reported as
89    /// an error instead of silently passing through. `None` preserves the
90    /// conservative pre-v0.7.12 behavior (no cross-module undefined-name
91    /// diagnostics).
92    imported_names: Option<HashSet<String>>,
93}
94
95impl TypeChecker {
96    pub(in crate::typechecker) fn wildcard_type() -> TypeExpr {
97        TypeExpr::Named("_".into())
98    }
99
100    pub(in crate::typechecker) fn is_wildcard_type(ty: &TypeExpr) -> bool {
101        matches!(ty, TypeExpr::Named(name) if name == "_")
102    }
103
104    pub(in crate::typechecker) fn base_type_name(ty: &TypeExpr) -> Option<&str> {
105        match ty {
106            TypeExpr::Named(name) => Some(name.as_str()),
107            TypeExpr::Applied { name, .. } => Some(name.as_str()),
108            _ => None,
109        }
110    }
111
112    pub fn new() -> Self {
113        Self {
114            diagnostics: Vec::new(),
115            scope: TypeScope::new(),
116            source: None,
117            hints: Vec::new(),
118            strict_types: false,
119            fn_depth: 0,
120            deprecated_fns: std::collections::HashMap::new(),
121            imported_names: None,
122        }
123    }
124
125    /// Create a type checker with strict types mode.
126    /// When enabled, flags unvalidated boundary-API values used in field access.
127    pub fn with_strict_types(strict: bool) -> Self {
128        Self {
129            diagnostics: Vec::new(),
130            scope: TypeScope::new(),
131            source: None,
132            hints: Vec::new(),
133            strict_types: strict,
134            fn_depth: 0,
135            deprecated_fns: std::collections::HashMap::new(),
136            imported_names: None,
137        }
138    }
139
140    /// Attach the set of names statically introduced by cross-module imports.
141    ///
142    /// Enables strict cross-module undefined-call errors: call sites that are
143    /// not builtins, not local declarations, not struct constructors, not
144    /// callable variables, and not in `imported` will produce a type error.
145    ///
146    /// Passing `None` (the default) preserves pre-v0.7.12 behavior where
147    /// unresolved call names only surface via lint diagnostics. Callers
148    /// should only pass `Some(set)` when every import in the file resolved
149    /// — see `harn_modules::ModuleGraph::imported_names_for_file`.
150    pub fn with_imported_names(mut self, imported: HashSet<String>) -> Self {
151        self.imported_names = Some(imported);
152        self
153    }
154
155    /// Check a program with source text for autofix generation.
156    pub fn check_with_source(mut self, program: &[SNode], source: &str) -> Vec<TypeDiagnostic> {
157        self.source = Some(source.to_string());
158        self.check_inner(program).0
159    }
160
161    /// Check a program with strict types mode and source text.
162    pub fn check_strict_with_source(
163        mut self,
164        program: &[SNode],
165        source: &str,
166    ) -> Vec<TypeDiagnostic> {
167        self.source = Some(source.to_string());
168        self.check_inner(program).0
169    }
170
171    /// Check a program and return diagnostics.
172    pub fn check(self, program: &[SNode]) -> Vec<TypeDiagnostic> {
173        self.check_inner(program).0
174    }
175
176    /// Check whether a function call value is a boundary source that produces
177    /// unvalidated data.  Returns `None` if the value is type-safe
178    /// (e.g. llm_call with a schema option, or a non-boundary function).
179    pub(in crate::typechecker) fn detect_boundary_source(
180        value: &SNode,
181        scope: &TypeScope,
182    ) -> Option<String> {
183        match &value.node {
184            Node::FunctionCall { name, args } => {
185                if !builtin_signatures::is_untyped_boundary_source(name) {
186                    return None;
187                }
188                // llm_call/llm_completion with a schema option are type-safe
189                if (name == "llm_call" || name == "llm_completion")
190                    && Self::llm_call_has_typed_schema_option(args, scope)
191                {
192                    return None;
193                }
194                Some(name.clone())
195            }
196            Node::Identifier(name) => scope.is_untyped_source(name).map(|s| s.to_string()),
197            _ => None,
198        }
199    }
200
201    /// True if an `llm_call` / `llm_completion` options dict names a
202    /// resolvable output schema. Used by the strict-types boundary checks
203    /// to suppress "unvalidated" warnings when the call site is typed.
204    /// Actual return-type narrowing is driven by the generic-builtin
205    /// dispatch path in `infer_type`, not this helper.
206    pub(in crate::typechecker) fn llm_call_has_typed_schema_option(
207        args: &[SNode],
208        scope: &TypeScope,
209    ) -> bool {
210        let Some(opts) = args.get(2) else {
211            return false;
212        };
213        let Node::DictLiteral(entries) = &opts.node else {
214            return false;
215        };
216        entries.iter().any(|entry| {
217            let key = match &entry.key.node {
218                Node::StringLiteral(k) | Node::Identifier(k) => k.as_str(),
219                _ => return false,
220            };
221            (key == "schema" || key == "output_schema")
222                && schema_type_expr_from_node(&entry.value, scope).is_some()
223        })
224    }
225
226    /// Check whether a type annotation is a concrete shape/struct type
227    /// (as opposed to bare `dict` or no annotation).
228    pub(in crate::typechecker) fn is_concrete_type(ty: &TypeExpr) -> bool {
229        matches!(
230            ty,
231            TypeExpr::Shape(_)
232                | TypeExpr::Applied { .. }
233                | TypeExpr::FnType { .. }
234                | TypeExpr::List(_)
235                | TypeExpr::Iter(_)
236                | TypeExpr::DictType(_, _)
237        ) || matches!(ty, TypeExpr::Named(n) if n != "dict" && n != "any" && n != "_")
238    }
239
240    /// Check a program and return both diagnostics and inlay hints.
241    pub fn check_with_hints(
242        mut self,
243        program: &[SNode],
244        source: &str,
245    ) -> (Vec<TypeDiagnostic>, Vec<InlayHintInfo>) {
246        self.source = Some(source.to_string());
247        self.check_inner(program)
248    }
249
250    pub(in crate::typechecker) fn error_at(&mut self, message: String, span: Span) {
251        self.diagnostics.push(TypeDiagnostic {
252            message,
253            severity: DiagnosticSeverity::Error,
254            span: Some(span),
255            help: None,
256            fix: None,
257            details: None,
258        });
259    }
260
261    #[allow(dead_code)]
262    pub(in crate::typechecker) fn error_at_with_help(
263        &mut self,
264        message: String,
265        span: Span,
266        help: String,
267    ) {
268        self.diagnostics.push(TypeDiagnostic {
269            message,
270            severity: DiagnosticSeverity::Error,
271            span: Some(span),
272            help: Some(help),
273            fix: None,
274            details: None,
275        });
276    }
277
278    pub(in crate::typechecker) fn error_at_with_fix(
279        &mut self,
280        message: String,
281        span: Span,
282        fix: Vec<FixEdit>,
283    ) {
284        self.diagnostics.push(TypeDiagnostic {
285            message,
286            severity: DiagnosticSeverity::Error,
287            span: Some(span),
288            help: None,
289            fix: Some(fix),
290            details: None,
291        });
292    }
293
294    /// Diagnostic site for non-exhaustive `match` arms. Match arms must be
295    /// exhaustive — a missing-variant `match` is a hard error. Authors who
296    /// genuinely want partial coverage opt out with a wildcard `_` arm.
297    /// Partial `if/elif/else` chains are intentionally allowed and are
298    /// instead handled by `check_unknown_exhaustiveness`, which stays a
299    /// warning so the `unreachable()` opt-in pattern continues to work.
300    pub(in crate::typechecker) fn exhaustiveness_error_at(&mut self, message: String, span: Span) {
301        self.diagnostics.push(TypeDiagnostic {
302            message,
303            severity: DiagnosticSeverity::Error,
304            span: Some(span),
305            help: None,
306            fix: None,
307            details: None,
308        });
309    }
310
311    /// Like `exhaustiveness_error_at` but additionally attaches the
312    /// missing-variant list as structured details. LSP code-actions
313    /// read this to synthesise an "Add missing match arms" quick-fix
314    /// without string-parsing the message.
315    pub(in crate::typechecker) fn exhaustiveness_error_with_missing(
316        &mut self,
317        message: String,
318        span: Span,
319        missing: Vec<String>,
320    ) {
321        self.diagnostics.push(TypeDiagnostic {
322            message,
323            severity: DiagnosticSeverity::Error,
324            span: Some(span),
325            help: None,
326            fix: None,
327            details: Some(DiagnosticDetails::NonExhaustiveMatch { missing }),
328        });
329    }
330
331    pub(in crate::typechecker) fn warning_at(&mut self, message: String, span: Span) {
332        self.diagnostics.push(TypeDiagnostic {
333            message,
334            severity: DiagnosticSeverity::Warning,
335            span: Some(span),
336            help: None,
337            fix: None,
338            details: None,
339        });
340    }
341
342    #[allow(dead_code)]
343    pub(in crate::typechecker) fn warning_at_with_help(
344        &mut self,
345        message: String,
346        span: Span,
347        help: String,
348    ) {
349        self.diagnostics.push(TypeDiagnostic {
350            message,
351            severity: DiagnosticSeverity::Warning,
352            span: Some(span),
353            help: Some(help),
354            fix: None,
355            details: None,
356        });
357    }
358}
359
360impl Default for TypeChecker {
361    fn default() -> Self {
362        Self::new()
363    }
364}
365
366#[cfg(test)]
367mod tests;