kdl_script/types.rs
1//! The type checker and types!
2//!
3//! The entry point is [`typeck`][] which is implicitly
4//! handled by [`Compiler::compile_path`][] or [`Compiler::compile_string`][]
5//! and will produce a [`TypedProgram`][].
6//!
7//! You should then call [`TypedProgram::definition_graph`][] with your
8//! target backend's [`PunEnv`][] to resolve all the [`PunTy`]s and get a
9//! final [`DefinitionGraph`][].
10//!
11//! You should then call [`DefinitionGraph::definitions`][] with the set
12//! of functions you want to emit (usually [`TypedProgram::all_funcs`][])
13//! to get the final forward-decls and definitions your target should emit
14//! to generate its program.
15//!
16//! If a test (function) fails, you can pass just that function to
17//! [`DefinitionGraph::definitions`][] to get a minimized program for just
18//! that one function.
19//!
20//! The type system is phased like this to allow work to be reused and shared
21//! where possible. Each of the above "lowerings" represents increased levels
22//! of specificity:
23//!
24//! * [`TypedProgram`][] is abstract over all possible backends and can be computed once.
25//! * [`DefinitionGraph`][] is for a concrete backend but still abstract over what parts
26//! of the program you might care about emitting. Computed once per backend config ([`PunEnv`]).
27//! * [`DefinitionGraph::definitions`][] is the final concrete program we want to emit.
28//!
29//! In principle a backend emitting various configs for a single [`TypedProgram`][] can
30//! share everything for a specific [`TyIdx`][] or [`FuncIdx`][], except they need to be
31//! careful about [`PunTy`][]s which can have [`DefinitionGraph`][]-specific lowerings...
32//! so really you should only recycle state created for a specific [`DefinitionGraph`]!
33//!
34//! FIXME: unlike [`AliasTy`][]s, [`PunTy`][]s really *should* completely evaporate in the
35//! backend's lowering. Perhaps we should do something in [`TypedProgram`][] to actually
36//! make them transparent?
37//!
38//! While performance isn't a huge concern for this project, combinatorics do get
39//! kind of out of control so work sharing is kinda important, especially as the backends
40//! get more complex! Also it's just nice to handle backend-agnostic issues once to keep
41//! things simple and correct.
42
43use std::cmp::Ordering;
44use std::collections::HashMap;
45use std::collections::HashSet;
46use std::sync::Arc;
47
48use miette::{Diagnostic, NamedSource, SourceSpan};
49use petgraph::graph::DiGraph;
50use petgraph::graph::NodeIndex;
51use thiserror::Error;
52
53use crate::parse::*;
54use crate::spanned::*;
55use crate::Compiler;
56use crate::Result;
57
58/// An error that occured while processing the types of a program.
59#[derive(Debug, Error, Diagnostic)]
60#[error("{message}")]
61pub struct KdlScriptTypeError {
62 pub message: String,
63 #[source_code]
64 pub src: Arc<NamedSource>,
65 #[label]
66 pub span: SourceSpan,
67 #[diagnostic(help)]
68 pub help: Option<String>,
69}
70
71/// A program that has had its symbolic types resolved to actual type ids.
72///
73/// Aliases and Puns are not fully resolved at this point.
74///
75/// Aliases still exist so that you can emit the target language's form of
76/// an alias if you want to most accurately express the input program.
77///
78/// Puns still exist because a TypedProgram is abstract over every possible
79/// output language to share the workload between each concrete backend.
80/// The next step in lowering the program is to ask it to resolve
81/// the puns for a specific [`crate::PunEnv`][] with [`TypedProgram::definition_graph`].
82/// Which will also handle computing the order of declarations for languages like C.
83#[derive(Debug)]
84pub struct TypedProgram {
85 tcx: TyCtx,
86 funcs: Vec<Func>,
87 builtin_funcs_start: usize,
88}
89
90/// A type id
91pub type TyIdx = usize;
92/// A function id
93pub type FuncIdx = usize;
94
95/// The actual structure of a type
96///
97/// This may be either a nominal, structural, or primitive type.
98///
99/// Any types that this type references will already have been normalized to a [`TyIdx`][]
100/// so you don't have to worry about name resolution or interning/memoizing. Notably
101/// all uses of `[u32; 5]` will have the same [`TyIdx`][], although `[MyU32Alias; 5]` will
102/// be get a separate type id to allow a backend to more accurately reproduce the input program.
103#[derive(Debug, Clone, PartialEq, Eq, Hash)]
104pub enum Ty {
105 /// A primitive (int, float, bool, ptr)
106 Primitive(PrimitiveTy),
107 /// A nominal struct
108 Struct(StructTy),
109 /// A nominal untagged union
110 Union(UnionTy),
111 /// A nominal C-style enum (see `Tagged` for a full rust-style enum)
112 Enum(EnumTy),
113 /// A nominal tagged union (rust-style enum, see `Enum` for a c-style enum)
114 Tagged(TaggedTy),
115 /// A transparent type alias (like typed)
116 Alias(AliasTy),
117 /// A type pun that can have different underlying types for different targets
118 Pun(PunTy),
119 /// A fixed-length array
120 Array(ArrayTy),
121 /// A reference to a type (behaves as if is the Pointee, but just passed by-ref)
122 Ref(RefTy),
123 /// Empty tuple -- `()`
124 Empty,
125}
126
127impl Ty {
128 pub fn is_nominal(&self) -> bool {
129 match self {
130 Ty::Primitive(_) => false,
131 Ty::Struct(_) => true,
132 Ty::Union(_) => true,
133 Ty::Enum(_) => true,
134 Ty::Tagged(_) => true,
135 Ty::Alias(_) => true,
136 Ty::Pun(_) => true,
137 Ty::Array(_) => false,
138 Ty::Ref(_) => false,
139 Ty::Empty => false,
140 }
141 }
142}
143
144/// A function
145#[derive(Debug, Clone)]
146pub struct Func {
147 /// The function's name
148 pub name: Ident,
149 /// The function's inputs
150 pub inputs: Vec<Arg>,
151 /// The function's outputs (note that outparams will appear as Ty::Ref outputs!)
152 pub outputs: Vec<Arg>,
153 /// Any attributes hanging off the function
154 pub attrs: Vec<Attr>,
155 #[cfg(feature = "eval")]
156 /// The body of the function (TBD, not needed for abi-cafe)
157 pub body: (),
158}
159
160/// A function argument (input or output).
161#[derive(Debug, Clone)]
162pub struct Arg {
163 /// The name of the argument
164 pub name: Ident,
165 /// The type of the arg
166 pub ty: TyIdx,
167}
168
169/// A primitive
170#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
171pub enum PrimitiveTy {
172 /// `i8` / `int8_t`
173 I8,
174 /// `i16` / `int16_t`
175 I16,
176 /// `i32` / `int32_t`
177 I32,
178 /// `i64` / `int64_t`
179 I64,
180 /// `i128` / `int128_t`
181 I128,
182 /// `i256` / `int256_t`
183 I256,
184 /// `u8` / `uint8_t`
185 U8,
186 /// `u16` / `uint16_t`
187 U16,
188 /// `u32` / `uint32_t`
189 U32,
190 /// `u64` / `uint64_t`
191 U64,
192 /// `u128` / `uint128_t`
193 U128,
194 /// `u256` / `uint256_t`
195 U256,
196 /// `f16` / `half`
197 F16,
198 /// `f32` / `float`
199 F32,
200 /// `f64` / `double`
201 F64,
202 /// `f128` / `quad`
203 F128,
204 /// `bool`
205 Bool,
206 /// An opaque pointer (like `void*`)
207 Ptr,
208}
209
210pub const PRIMITIVES: &[(&str, PrimitiveTy)] = &[
211 ("i8", PrimitiveTy::I8),
212 ("i16", PrimitiveTy::I16),
213 ("i32", PrimitiveTy::I32),
214 ("i64", PrimitiveTy::I64),
215 ("i128", PrimitiveTy::I128),
216 ("i256", PrimitiveTy::I256),
217 ("u8", PrimitiveTy::U8),
218 ("u16", PrimitiveTy::U16),
219 ("u32", PrimitiveTy::U32),
220 ("u64", PrimitiveTy::U64),
221 ("u128", PrimitiveTy::U128),
222 ("u256", PrimitiveTy::U256),
223 ("f16", PrimitiveTy::F16),
224 ("f32", PrimitiveTy::F32),
225 ("f64", PrimitiveTy::F64),
226 ("f128", PrimitiveTy::F128),
227 ("bool", PrimitiveTy::Bool),
228 ("ptr", PrimitiveTy::Ptr),
229];
230
231/// The Ty of a nominal struct.
232#[derive(Debug, Clone, PartialEq, Eq, Hash)]
233pub struct StructTy {
234 pub name: Ident,
235 pub fields: Vec<FieldTy>,
236 pub attrs: Vec<Attr>,
237 /// True if all fields had was_blank set, indicating this could be emitted as a tuple-struct
238 pub all_fields_were_blank: bool,
239}
240
241/// The Ty of an untagged union.
242///
243/// See [`TaggedTy`][] for a tagged union.
244#[derive(Debug, Clone, PartialEq, Eq, Hash)]
245pub struct UnionTy {
246 pub name: Ident,
247 pub fields: Vec<FieldTy>,
248 pub attrs: Vec<Attr>,
249}
250
251/// The Ty of an Enum.
252#[derive(Debug, Clone, PartialEq, Eq, Hash)]
253pub struct EnumTy {
254 pub name: Ident,
255 pub variants: Vec<EnumVariantTy>,
256 pub attrs: Vec<Attr>,
257}
258
259/// An enum variant
260#[derive(Debug, Clone, PartialEq, Eq, Hash)]
261pub struct EnumVariantTy {
262 pub name: Ident,
263 // pub val: LiteralExpr,
264}
265
266/// The Ty of a tagged union (rust-style enum).
267///
268/// See [`UnionTy`][] for an untagged union.
269///
270/// See [`EnumTy`][] for a c-style enum.
271#[derive(Debug, Clone, PartialEq, Eq, Hash)]
272pub struct TaggedTy {
273 pub name: Ident,
274 pub variants: Vec<TaggedVariantTy>,
275 pub attrs: Vec<Attr>,
276}
277
278/// A variant for a tagged union.
279#[derive(Debug, Clone, PartialEq, Eq, Hash)]
280pub struct TaggedVariantTy {
281 pub name: Ident,
282 pub fields: Option<Vec<FieldTy>>,
283 /// True if all fields have was_blank set, indicating this could be emitted as a tuple-variant
284 pub all_fields_were_blank: bool,
285}
286
287/// The Ty of a transparent type alias.
288///
289/// i.e. `type name = real` in rust
290///
291/// i.e. `typedef real name` in C++
292#[derive(Debug, Clone, PartialEq, Eq, Hash)]
293pub struct AliasTy {
294 pub name: Ident,
295 pub real: TyIdx,
296 pub attrs: Vec<Attr>,
297}
298
299/// A field of a [`StructTy`][] or [`UnionTy`][].
300#[derive(Debug, Clone, PartialEq, Eq, Hash)]
301pub struct FieldTy {
302 pub idx: usize,
303 pub ident: Ident,
304 pub ty: TyIdx,
305}
306
307/// The Ty of a fixed length array.
308#[derive(Debug, Clone, PartialEq, Eq, Hash)]
309pub struct ArrayTy {
310 pub elem_ty: TyIdx,
311 pub len: u64,
312}
313
314/// The Ty of a reference (transparent pointer).
315///
316/// This is used to represent passing a value by-reference, and so backends
317/// should consider the "value" to be the pointee. If you want to test that
318/// a pointer doesn't have its value corrupted but don't care about the pointee,
319/// use `PrimitiveTy::Ptr`.
320///
321/// When used in the `outputs` of a [`Func`], this expresses an out-param
322/// that the caller is responsible for "allocating" (and initializing?) and
323/// the callee is responsible for "writing" the value to it. The caller then
324/// checks the value just like other outputs.
325///
326/// Out-params should appear after "normal" inputs but before vararg inputs,
327/// with the name specified.
328#[derive(Debug, Clone, PartialEq, Eq, Hash)]
329pub struct RefTy {
330 pub pointee_ty: TyIdx,
331}
332
333/// The Ty of a Pun.
334///
335/// Puns express the fact that different languages might express a type
336/// in completely different ways but we expect the layout and/or ABI to
337/// match.
338///
339/// e.g. `Option<&T>` in Rust is equivalent to `T*` in C!
340///
341/// Resolve this with [`TypedProgram::resolve_pun`][].
342#[derive(Debug, Clone, PartialEq, Eq, Hash)]
343pub struct PunTy {
344 pub name: Ident,
345 pub blocks: Vec<PunBlockTy>,
346 pub attrs: Vec<Attr>,
347}
348
349/// A block for a [`PunTy`][]
350#[derive(Debug, Clone, PartialEq, Eq, Hash)]
351pub struct PunBlockTy {
352 pub selector: PunSelector,
353 pub real: TyIdx,
354}
355
356/// Information on all the types.
357///
358/// The key function of TyCtx is to `memoize` all parsed types (TyName) into
359/// type ids (TyIdx), to enable correct type comparison. Two types are equal
360/// *if and only if* they have the same TyIdx.
361///
362/// This is necessary because *nominal* types (TyName::Named, i.e. structs) can
363/// be messy due to shenanigans like captures/scoping/shadowing/inference. Types
364/// may refer to names that are out of scope, and two names that are equal
365/// (as strings) may not actually refer to the same type declaration.
366///
367/// To handle this, whenever a new named type is declared ([TyCtx::push_nominal_decl_incomplete][]),
368/// we generate a unique type id ([`TyIdx`][]) for it. Then whenever we encounter
369/// a reference to a Named type, we lookup the currently in scope TyIdx for that
370/// name, and use that instead. Named type scoping is managed by `envs`.
371///
372/// Replacing type names with type ids requires a change of representation,
373/// which is why we have [`Ty`][]. A Ty is the *structure* of a type with all types
374/// it refers to resolved to TyIdx's (e.g. a field of a tuple, the return type of a function).
375/// For convenience, non-typing metadata may also be stored in a Ty.
376///
377/// So a necessary intermediate step of converting an Ident to a TyIdx is to first
378/// convert it to a Ty. This intermediate value is stored in `tys`.
379/// If you have a TyIdx, you can get its Ty with [`realize_ty`][]. This lets you
380/// e.g. check if a value being called is actually a Func, and if it is,
381/// what the type ids of its arguments/return types are.
382///
383/// `ty_map` stores all the *structural* Tys we've seen before (everything that
384/// *isn't* TyName::Named), ensuring two structural types have the same TyIdx.
385/// i.e. `[u32; 4]` will have the same TyIdx everywhere it occurs.
386#[derive(Debug)]
387pub(crate) struct TyCtx {
388 /// The source code this is from, for resolving spans/errors.
389 src: Arc<NamedSource>,
390
391 /// The list of every known type.
392 ///
393 /// These are the "canonical" copies of each type. Types are
394 /// registered here via `memoize`, which returns a TyIdx into
395 /// this array.
396 ///
397 /// Types should be compared by checking if they have the same
398 /// TyIdx. This allows you to properly compare nominal types
399 /// in the face of shadowing and similar situations.
400 tys: Vec<Ty>,
401
402 /// Mappings from structural types we've seen to type indices.
403 ///
404 /// This is used to get the canonical TyIdx of a structural type
405 /// (including builtin primitives).
406 ///
407 /// Nominal types (structs) are stored in `envs`, because they
408 /// go in and out of scope.
409 ty_map: HashMap<Ty, TyIdx>,
410
411 ty_facts: HashMap<TyIdx, TypeFact>,
412
413 /// Scoped type info, reflecting the fact that struct definitions
414 /// and variables come in and out of scope.
415 ///
416 /// These values are "cumulative", so type names and variables
417 /// should be looked up by searching backwards in this array.
418 ///
419 /// If nothing is found, that type name / variable name is undefined
420 /// at this point in the program.
421 envs: Vec<CheckEnv>,
422}
423
424#[derive(Debug, Clone)]
425struct TypeFact {
426 contains_ref: bool,
427}
428
429/// Information about types for a specific scope.
430#[derive(Debug)]
431struct CheckEnv {
432 /// The struct definitions and TyIdx's
433 tys: HashMap<Ident, TyIdx>,
434}
435
436/// Take a ParsedProgram and produce a TypedProgram for it!
437pub fn typeck(comp: &mut Compiler, parsed: &ParsedProgram) -> Result<TypedProgram> {
438 let mut tcx = TyCtx {
439 src: comp.source.clone().unwrap(),
440 tys: vec![],
441 ty_map: HashMap::new(),
442 ty_facts: HashMap::new(),
443 envs: vec![],
444 };
445
446 // Add global builtins
447 tcx.envs.push(CheckEnv {
448 tys: HashMap::new(),
449 });
450 tcx.add_builtins();
451
452 // Put user-defined types in a separate scope just to be safe
453 tcx.envs.push(CheckEnv {
454 tys: HashMap::new(),
455 });
456
457 // Add all the user defined types
458 for (ty_name, _ty_decl) in &parsed.tys {
459 let _ty_idx = tcx.push_nominal_decl_incomplete(ty_name.clone());
460 }
461 for (ty_name, ty_decl) in &parsed.tys {
462 tcx.complete_nominal_decl(ty_name, ty_decl)?;
463 }
464
465 let funcs = parsed
466 .funcs
467 .iter()
468 .map(|(_func_name, func_decl)| -> Result<Func> {
469 let inputs = func_decl
470 .inputs
471 .iter()
472 .enumerate()
473 .map(|(idx, var)| -> Result<Arg> {
474 let name = ident_var(var.name.clone(), "arg", idx, &var.ty);
475 let ty = tcx.memoize_ty(&var.ty)?;
476 Ok(Arg { name, ty })
477 })
478 .collect::<Result<Vec<_>>>()?;
479 let outputs = func_decl
480 .outputs
481 .iter()
482 .enumerate()
483 .map(|(idx, var)| {
484 let name = ident_var(var.name.clone(), "out", idx, &var.ty);
485 let ty = tcx.memoize_ty(&var.ty)?;
486 Ok(Arg { name, ty })
487 })
488 .collect::<Result<Vec<_>>>()?;
489
490 let name = func_decl.name.clone();
491 let attrs = func_decl.attrs.clone();
492 Ok(Func {
493 name,
494 inputs,
495 outputs,
496 attrs,
497 body: (),
498 })
499 })
500 .collect::<Result<Vec<_>>>()?;
501
502 // Now that everything's added, compute some facts
503 tcx.compute_ty_facts()?;
504
505 let builtin_funcs_start = parsed.builtin_funcs_start;
506 Ok(TypedProgram {
507 tcx,
508 funcs,
509 builtin_funcs_start,
510 })
511}
512
513impl TyCtx {
514 /// Add the builtin types to the TyCtx
515 fn add_builtins(&mut self) {
516 let builtins = PRIMITIVES
517 .iter()
518 .map(|(name, prim)| (*name, Ty::Primitive(*prim)))
519 .chain(Some(("()", Ty::Empty)));
520
521 for (ty_name, ty) in builtins {
522 let ty_idx = self.tys.len();
523 self.tys.push(ty);
524 self.envs
525 .last_mut()
526 .unwrap()
527 .tys
528 .insert(Ident::from(ty_name.to_owned()), ty_idx);
529 }
530 }
531
532 /// Register a new nominal struct in this scope.
533 ///
534 /// This creates a valid TyIdx for the type, but the actual Ty
535 /// while be garbage (Ty::Empty arbitrarily) and needs to be
536 /// filled in properly with [`TyCtx::complete_nominal_decl`][].
537 ///
538 /// This two-phase system is necessary to allow nominal types to
539 /// be unordered or self-referential.
540 fn push_nominal_decl_incomplete(&mut self, ty_name: Ident) -> TyIdx {
541 let ty_idx = self.tys.len();
542 let dummy_ty = Ty::Empty;
543 self.tys.push(dummy_ty);
544 self.envs.last_mut().unwrap().tys.insert(ty_name, ty_idx);
545 ty_idx
546 }
547
548 /// Complete a nominal decl created with [`TyCtx::push_nominal_decl_incomplete`][].
549 fn complete_nominal_decl(&mut self, ty_name: &Ident, ty_decl: &TyDecl) -> Result<()> {
550 // This failing is an ICE and not a user issue!
551 let ty_idx = self
552 .resolve_nominal_ty(ty_name)
553 .expect("completing a nominal ty that hasn't been decl'd");
554 let ty = self.memoize_nominal_parts(ty_decl)?;
555 self.tys[ty_idx] = ty;
556 Ok(())
557 }
558
559 /// Memoize the parts of a nominal ty.
560 fn memoize_nominal_parts(&mut self, ty_decl: &TyDecl) -> Result<Ty> {
561 let ty = match ty_decl {
562 TyDecl::Struct(decl) => {
563 let fields = decl
564 .fields
565 .iter()
566 .enumerate()
567 .map(|(idx, f)| {
568 Ok(FieldTy {
569 idx,
570 ident: ident_var(f.name.clone(), "field", idx, &f.ty),
571 ty: self.memoize_ty(&f.ty)?,
572 })
573 })
574 .collect::<Result<Vec<_>>>()?;
575 let all_fields_were_blank = fields.iter().all(|f| f.ident.was_blank);
576 Ty::Struct(StructTy {
577 name: decl.name.clone(),
578 fields,
579 attrs: decl.attrs.clone(),
580 all_fields_were_blank,
581 })
582 }
583 TyDecl::Union(decl) => {
584 let fields = decl
585 .fields
586 .iter()
587 .enumerate()
588 .map(|(idx, f)| {
589 Ok(FieldTy {
590 idx,
591 ident: ident_var(f.name.clone(), "field", idx, &f.ty),
592 ty: self.memoize_ty(&f.ty)?,
593 })
594 })
595 .collect::<Result<Vec<_>>>()?;
596 Ty::Union(UnionTy {
597 name: decl.name.clone(),
598 fields,
599 attrs: decl.attrs.clone(),
600 })
601 }
602 TyDecl::Enum(decl) => {
603 let variants = decl
604 .variants
605 .iter()
606 .map(|v| EnumVariantTy {
607 name: v.name.clone(),
608 })
609 .collect::<Vec<_>>();
610 Ty::Enum(EnumTy {
611 name: decl.name.clone(),
612 variants,
613 attrs: decl.attrs.clone(),
614 })
615 }
616 TyDecl::Tagged(decl) => {
617 let variants = decl
618 .variants
619 .iter()
620 .map(|v| {
621 let fields = if let Some(fields) = &v.fields {
622 Some(
623 fields
624 .iter()
625 .enumerate()
626 .map(|(idx, f)| {
627 Ok(FieldTy {
628 idx,
629 ident: ident_var(f.name.clone(), "field", idx, &f.ty),
630 ty: self.memoize_ty(&f.ty)?,
631 })
632 })
633 .collect::<Result<Vec<_>>>()?,
634 )
635 } else {
636 None
637 };
638 let all_fields_were_blank = fields
639 .as_deref()
640 .unwrap_or_default()
641 .iter()
642 .all(|f| f.ident.was_blank);
643 Ok(TaggedVariantTy {
644 name: v.name.clone(),
645 fields,
646 all_fields_were_blank,
647 })
648 })
649 .collect::<Result<Vec<_>>>()?;
650 Ty::Tagged(TaggedTy {
651 name: decl.name.clone(),
652 variants,
653 attrs: decl.attrs.clone(),
654 })
655 }
656 TyDecl::Alias(decl) => {
657 let real_ty = self.memoize_ty(&decl.alias)?;
658 Ty::Alias(AliasTy {
659 name: decl.name.clone(),
660 real: real_ty,
661 attrs: decl.attrs.clone(),
662 })
663 }
664 TyDecl::Pun(decl) => {
665 let blocks = decl
666 .blocks
667 .iter()
668 .map(|block| {
669 // !!! If this ever becomes fallible we'll want a proper stack guard to pop!
670 self.envs.push(CheckEnv {
671 tys: HashMap::new(),
672 });
673 let real_decl = &block.decl;
674 let real = self.push_nominal_decl_incomplete(decl.name.clone());
675 self.complete_nominal_decl(&decl.name, real_decl)?;
676 self.envs.pop();
677
678 Ok(PunBlockTy {
679 selector: block.selector.clone(),
680 real,
681 })
682 })
683 .collect::<Result<Vec<_>>>()?;
684
685 Ty::Pun(PunTy {
686 name: decl.name.clone(),
687 blocks,
688 attrs: decl.attrs.clone(),
689 })
690 }
691 };
692 Ok(ty)
693 }
694
695 /// Resolve the type id (TyIdx) associated with a nominal type (struct name),
696 /// at this point in the program.
697 fn resolve_nominal_ty(&mut self, ty_name: &str) -> Option<TyIdx> {
698 for env in self.envs.iter_mut().rev() {
699 if let Some(ty) = env.tys.get(ty_name) {
700 return Some(*ty);
701 }
702 }
703 None
704 }
705
706 /// Converts a TyName (parsed type) into a TyIdx (type id).
707 ///
708 /// All TyNames in the program must be memoized, as this is the only reliable
709 /// way to do type comparisons. See the top level docs of TyIdx for details.
710 fn memoize_ty(&mut self, ty_ref: &Spanned<Tydent>) -> Result<TyIdx> {
711 let ty_idx = match &**ty_ref {
712 Tydent::Empty => self.memoize_inner(Ty::Empty),
713 Tydent::Ref(pointee_ty_ref) => {
714 let pointee_ty = self.memoize_ty(pointee_ty_ref)?;
715 self.memoize_inner(Ty::Ref(RefTy { pointee_ty }))
716 }
717 Tydent::Array(elem_ty_ref, len) => {
718 let elem_ty = self.memoize_ty(elem_ty_ref)?;
719 self.memoize_inner(Ty::Array(ArrayTy { elem_ty, len: *len }))
720 }
721 Tydent::Name(name) => {
722 // Nominal types take a separate path because they're scoped
723 if let Some(ty_idx) = self.resolve_nominal_ty(name) {
724 ty_idx
725 } else {
726 return Err(KdlScriptTypeError {
727 message: format!("use of undefined type name: {name}"),
728 src: self.src.clone(),
729 span: Spanned::span(name),
730 help: None,
731 })?;
732 }
733 }
734 };
735
736 Ok(ty_idx)
737 }
738
739 /// Converts a Ty (structural type with all subtypes resolved) into a TyIdx (type id).
740 fn memoize_inner(&mut self, ty: Ty) -> TyIdx {
741 if let Some(idx) = self.ty_map.get(&ty) {
742 *idx
743 } else {
744 let ty1 = ty.clone();
745 let ty2 = ty;
746 let idx = self.tys.len();
747
748 self.ty_map.insert(ty1, idx);
749 self.tys.push(ty2);
750 idx
751 }
752 }
753
754 /// Get the type-structure (Ty) associated with this type id (TyIdx).
755 pub fn realize_ty(&self, ty: TyIdx) -> &Ty {
756 self.tys
757 .get(ty)
758 .expect("Internal Compiler Error: invalid TyIdx")
759 }
760
761 /// Resolve a [`PunTy`][] based on the current [`PunEnv`][].
762 pub fn resolve_pun(&self, pun: &PunTy, env: &PunEnv) -> Result<TyIdx> {
763 for block in &pun.blocks {
764 if block.selector.matches(env) {
765 return Ok(block.real);
766 }
767 }
768
769 Err(KdlScriptTypeError {
770 message: "Failed to find applicable pun for this target environment".to_string(),
771 src: self.src.clone(),
772 span: Spanned::span(&pun.name),
773 help: Some(format!("Add another block that matches {:#?}", env)),
774 })?
775 }
776
777 fn compute_ty_facts(&mut self) -> Result<()> {
778 let mut to_compute = (0..self.tys.len()).collect::<Vec<TyIdx>>();
779 let mut already_visited = HashSet::new();
780
781 fn aggregate_facts(
782 this: &mut TyCtx,
783 to_compute: &mut Vec<TyIdx>,
784 already_visited: &mut HashSet<TyIdx>,
785 cur_ty: TyIdx,
786 child_tys: Vec<TyIdx>,
787 ) -> Result<Option<TypeFact>> {
788 let mut facts = TypeFact {
789 contains_ref: false,
790 };
791 let mut missing_info = vec![];
792 for child_ty in child_tys {
793 let Some(child_fact) = this.ty_facts.get(&child_ty) else {
794 missing_info.push(child_ty);
795 continue;
796 };
797 let TypeFact { contains_ref } = child_fact;
798 facts.contains_ref |= contains_ref;
799 }
800
801 // If everything resolved, great, we're done
802 if missing_info.is_empty() {
803 return Ok(Some(facts));
804 }
805
806 // Otherwise, restack ourself and the missing children, hopefully by the time
807 // the children resolve we'll have a definite answer.
808 //
809 // However if we *already* restacked ourselves, at this point assume there's
810 // an infinite type without the proper indirection of a reference!
811 if already_visited.contains(&cur_ty) {
812 Err(KdlScriptTypeError {
813 message: "This type is infinitely recursive without an indirection!".to_owned(),
814 src: this.src.clone(),
815 span: this.span_for_ty_decl(cur_ty),
816 help: Some("Add a reference (&) somewhere".to_owned()),
817 })?;
818 }
819 already_visited.insert(cur_ty);
820 to_compute.push(cur_ty);
821 to_compute.extend(missing_info);
822
823 Ok(None)
824 }
825
826 while let Some(ty_idx) = to_compute.pop() {
827 let facts = match self.realize_ty(ty_idx) {
828 Ty::Primitive(_) => Some(TypeFact {
829 contains_ref: false,
830 }),
831 Ty::Empty => Some(TypeFact {
832 contains_ref: false,
833 }),
834 Ty::Enum(_) => Some(TypeFact {
835 contains_ref: false,
836 }),
837 Ty::Ref(_) => Some(TypeFact { contains_ref: true }),
838
839 Ty::Alias(ty) => {
840 let child_tys = vec![ty.real];
841 aggregate_facts(
842 self,
843 &mut to_compute,
844 &mut already_visited,
845 ty_idx,
846 child_tys,
847 )?
848 }
849 Ty::Array(ty) => {
850 let child_tys = vec![ty.elem_ty];
851 aggregate_facts(
852 self,
853 &mut to_compute,
854 &mut already_visited,
855 ty_idx,
856 child_tys,
857 )?
858 }
859 Ty::Struct(ty) => {
860 let child_tys = ty.fields.iter().map(|f| f.ty).collect();
861 aggregate_facts(
862 self,
863 &mut to_compute,
864 &mut already_visited,
865 ty_idx,
866 child_tys,
867 )?
868 }
869 Ty::Union(ty) => {
870 let child_tys = ty.fields.iter().map(|f| f.ty).collect();
871 aggregate_facts(
872 self,
873 &mut to_compute,
874 &mut already_visited,
875 ty_idx,
876 child_tys,
877 )?
878 }
879 Ty::Tagged(ty) => {
880 let child_tys = ty
881 .variants
882 .iter()
883 .flat_map(|v| v.fields.as_deref().unwrap_or_default().iter().map(|f| f.ty))
884 .collect();
885 aggregate_facts(
886 self,
887 &mut to_compute,
888 &mut already_visited,
889 ty_idx,
890 child_tys,
891 )?
892 }
893 Ty::Pun(ty) => {
894 let child_tys = ty.blocks.iter().map(|f| f.real).collect();
895 aggregate_facts(
896 self,
897 &mut to_compute,
898 &mut already_visited,
899 ty_idx,
900 child_tys,
901 )?
902 }
903 };
904
905 if let Some(facts) = facts {
906 self.ty_facts.insert(ty_idx, facts);
907 }
908 }
909 Ok(())
910 }
911
912 /*
913 pub fn pointee_ty(&self, ty: TyIdx) -> TyIdx {
914 if let Ty::TypedPtr(pointee) = self.realize_ty(ty) {
915 *pointee
916 } else {
917 unreachable!("expected typed to be pointer");
918 }
919 }
920 */
921 pub fn span_for_ty_decl(&self, ty: TyIdx) -> SourceSpan {
922 match self.realize_ty(ty) {
923 Ty::Primitive(_) => SourceSpan::from(1..1),
924 Ty::Empty => SourceSpan::from(1..1),
925 Ty::Struct(ty) => Spanned::span(&ty.name),
926 Ty::Union(ty) => Spanned::span(&ty.name),
927 Ty::Enum(ty) => Spanned::span(&ty.name),
928 Ty::Tagged(ty) => Spanned::span(&ty.name),
929 Ty::Alias(ty) => Spanned::span(&ty.name),
930 Ty::Pun(ty) => Spanned::span(&ty.name),
931 Ty::Array(ty) => self.span_for_ty_decl(ty.elem_ty),
932 Ty::Ref(ty) => self.span_for_ty_decl(ty.pointee_ty),
933 }
934 }
935 /// Stringify a type.
936 pub fn format_ty(&self, ty: TyIdx) -> String {
937 match self.realize_ty(ty) {
938 Ty::Primitive(prim) => format!("{:?}", prim).to_lowercase(),
939 Ty::Empty => "()".to_string(),
940 Ty::Struct(decl) => format!("{}", decl.name),
941 Ty::Enum(decl) => format!("{}", decl.name),
942 Ty::Tagged(decl) => format!("{}", decl.name),
943 Ty::Union(decl) => format!("{}", decl.name),
944 Ty::Alias(decl) => format!("{}", decl.name),
945 Ty::Pun(decl) => format!("{}", decl.name),
946 Ty::Array(array_ty) => {
947 let inner = self.format_ty(array_ty.elem_ty);
948 format!("[{}; {}]", inner, array_ty.len)
949 }
950 Ty::Ref(ref_ty) => {
951 let inner = self.format_ty(ref_ty.pointee_ty);
952 format!("&{}", inner)
953 }
954 }
955 }
956}
957
958/// A node in the DefinitionGraph
959#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
960enum DefinitionGraphNode {
961 Func(FuncIdx),
962 Ty(TyIdx),
963}
964
965/// A Dependency Graph of all the type/function definitions.
966#[derive(Debug, Clone)]
967pub struct DefinitionGraph {
968 /// The actual Graph
969 graph: DiGraph<DefinitionGraphNode, ()>,
970 /// FuncIdx = NodeIdx
971 func_nodes: Vec<NodeIndex>,
972 /// The Strongly Connected Components in topological order
973 def_order: Vec<Vec<NodeIndex>>,
974}
975
976impl TypedProgram {
977 /// Get the Ty for this TyIdx
978 pub fn realize_ty(&self, ty: TyIdx) -> &Ty {
979 self.tcx.realize_ty(ty)
980 }
981
982 /// Get the Func for this FuncIdx
983 pub fn realize_func(&self, func: FuncIdx) -> &Func {
984 &self.funcs[func]
985 }
986
987 pub fn ty_contains_ref(&self, ty: TyIdx) -> bool {
988 self.tcx.ty_facts[&ty].contains_ref
989 }
990
991 pub fn all_funcs(&self) -> impl Iterator<Item = FuncIdx> {
992 0..self.builtin_funcs_start
993 }
994
995 /// Resolve a [`PunTy`][] based on the current [`PunEnv`][].
996 pub fn resolve_pun(&self, pun: &PunTy, env: &PunEnv) -> Result<TyIdx> {
997 self.tcx.resolve_pun(pun, env)
998 }
999
1000 /// Stringify a type (for debugging).
1001 pub fn format_ty(&self, ty: TyIdx) -> String {
1002 self.tcx.format_ty(ty)
1003 }
1004
1005 /// Compute the dependency graph between types ([`DefinitionGraph`][]).
1006 ///
1007 /// This serves two purposes:
1008 ///
1009 /// * Figuring out the correct order of type/function declarations (and forward declarations)
1010 /// for languages like C that need that kind of thing (and its prettier for other langs).
1011 ///
1012 /// * Producing minimized examples for subsets of the program (by only emitting the types
1013 /// needed for a single function).
1014 ///
1015 /// The next step in lowering the program is to query [`DefinitionGraph::definitions`][] with the
1016 /// functions you want to emit!
1017 ///
1018 /// This can fail if the given [`PunEnv`][] fails to resolve a [`PunTy`][].
1019 pub fn definition_graph(&self, env: &PunEnv) -> Result<DefinitionGraph> {
1020 let mut graph = petgraph::graph::DiGraph::new();
1021 let mut nodes = vec![];
1022
1023 // First create all the nodes for all the types
1024 for (ty_idx, _ty) in self.tcx.tys.iter().enumerate() {
1025 let ty_node = graph.add_node(DefinitionGraphNode::Ty(ty_idx));
1026 nodes.push(ty_node);
1027 }
1028
1029 // Now create edges between them for deps
1030 //
1031 // NOTE: it's fine to make an edge from a node to itself, that doesn't
1032 // change anything we do further down with SCCs!
1033 for (ty_idx, ty) in self.tcx.tys.iter().enumerate() {
1034 let ty_node = nodes[ty_idx];
1035 match ty {
1036 Ty::Struct(ty) => {
1037 for field in &ty.fields {
1038 let field_ty_node = nodes[field.ty];
1039 graph.update_edge(ty_node, field_ty_node, ());
1040 }
1041 }
1042 Ty::Union(ty) => {
1043 for field in &ty.fields {
1044 let field_ty_node = nodes[field.ty];
1045 graph.update_edge(ty_node, field_ty_node, ());
1046 }
1047 }
1048 Ty::Tagged(ty) => {
1049 for variant in &ty.variants {
1050 if let Some(fields) = &variant.fields {
1051 for field in fields {
1052 let field_ty_node = nodes[field.ty];
1053 graph.update_edge(ty_node, field_ty_node, ());
1054 }
1055 }
1056 }
1057 }
1058 Ty::Alias(ty) => {
1059 let real_ty_node = nodes[ty.real];
1060 graph.update_edge(ty_node, real_ty_node, ());
1061 }
1062 Ty::Pun(ty) => {
1063 let real_ty_node = nodes[self.tcx.resolve_pun(ty, env)?];
1064 graph.update_edge(ty_node, real_ty_node, ());
1065 }
1066 Ty::Array(ty) => {
1067 let elem_ty_node = nodes[ty.elem_ty];
1068 graph.update_edge(ty_node, elem_ty_node, ());
1069 }
1070 Ty::Ref(ty) => {
1071 let pointee_ty_node = nodes[ty.pointee_ty];
1072 graph.update_edge(ty_node, pointee_ty_node, ());
1073 }
1074 Ty::Enum(_) => {
1075 // Arguably this can't depend on any types...
1076 // BUT we should consider whether `@tag i32` is a dependency on i32!
1077 // These kinds of annotations aren't configured yet though!
1078 }
1079 Ty::Primitive(_) | Ty::Empty => {
1080 // These types have no deps, no edges to add!
1081 }
1082 }
1083 }
1084
1085 // Add edges from functions to the things they reference
1086 let mut func_nodes = vec![];
1087 for (func_idx, func) in self.funcs.iter().enumerate() {
1088 let func_node = graph.add_node(DefinitionGraphNode::Func(func_idx));
1089 for arg in func.inputs.iter().chain(func.outputs.iter()) {
1090 let arg_ty_node = nodes[arg.ty];
1091 graph.update_edge(func_node, arg_ty_node, ());
1092 }
1093 func_nodes.push(func_node);
1094 }
1095
1096 // Now compute the Strongly Connected Components!
1097 // See the comment in `DefinitionGraph::definitions` for details on what this is!
1098 let mut def_order = petgraph::algo::kosaraju_scc(&graph);
1099
1100 // Further refine the SCC order!
1101 //
1102 // We need all the nominal types to come first.
1103 // This is because the *name* `&MyType` still contains `MyType`,
1104 // so starting the forward declares with it would still refer to an
1105 // undeclared type. Thankfully no such issue exists for starting with
1106 // `MyType`, and all cycles always involve a nominal type!
1107 for component in &mut def_order {
1108 let mut sorted_nodes = std::mem::take(component);
1109 sorted_nodes.sort_by(|&lhs, &rhs| match (&graph[lhs], &graph[rhs]) {
1110 (DefinitionGraphNode::Func(l), DefinitionGraphNode::Func(r)) => l.cmp(r),
1111 (DefinitionGraphNode::Func(_), DefinitionGraphNode::Ty(_)) => Ordering::Less,
1112 (DefinitionGraphNode::Ty(_), DefinitionGraphNode::Func(_)) => Ordering::Greater,
1113 (DefinitionGraphNode::Ty(l), DefinitionGraphNode::Ty(r)) => {
1114 let lty = self.realize_ty(*l);
1115 let rty = self.realize_ty(*r);
1116 rty.is_nominal().cmp(<y.is_nominal()).then(l.cmp(r))
1117 }
1118 });
1119 *component = sorted_nodes;
1120 }
1121
1122 Ok(DefinitionGraph {
1123 graph,
1124 func_nodes,
1125 def_order,
1126 })
1127 }
1128}
1129
1130/// Kinds of definitions/declarations a backend should emit
1131pub enum Definition {
1132 /// Forward-declare this type
1133 DeclareTy(TyIdx),
1134 /// Define this type fully
1135 DefineTy(TyIdx),
1136 /// Forward-declare the function (only ever necessary with `feature="eval"`)
1137 /// otherwise functions are always roots and never need forward-declares.
1138 DeclareFunc(FuncIdx),
1139 /// Define this function fully
1140 DefineFunc(FuncIdx),
1141}
1142
1143impl DefinitionGraph {
1144 /// Get the exact list of forward-declares and definitions to emit the program!
1145 ///
1146 /// Note that the recommendations are *extremely* agnostic to the target language
1147 /// and will generally recommend you forward-declare or define a lot of types
1148 /// that need no such thing in basically every language.
1149 ///
1150 /// For instance with a definition like:
1151 ///
1152 /// ```kdl
1153 /// struct "SelfReferential" {
1154 /// me "Option<&SelfReferential>"
1155 /// val "u32"
1156 /// }
1157 /// ```
1158 ///
1159 /// (Generics aren't currently supported, this is just easier to express.)
1160 ///
1161 /// You will get recommended something like:
1162 ///
1163 /// 1. Define `u32`
1164 /// 2. Forward-declare `SelfReferential`
1165 /// 3. Forward-declare `&SelfReferential`
1166 /// 4. Define `Option<&SelfReferential>`
1167 /// 5. Define `SelfReferential`
1168 /// 6. Define `&SelfReferential`
1169 ///
1170 /// Which contains a lot of things that are nonsensical in basically every language!
1171 /// That's ok! Just ignore the nonsensical recommendations like "declare a primitive"
1172 /// or "forward-declare a reference" if they aren't necessary in your language!
1173 ///
1174 /// A Rust backend would only need to emit 5 (and maybe 4 if it's not Real Option).
1175 ///
1176 /// A C backend would only need to emit 2 and 5 (and maybe 4 if it's not Real Option).
1177 pub fn definitions(&self, funcs: impl IntoIterator<Item = FuncIdx>) -> Vec<Definition> {
1178 // Take the requested functions and compute all their dependencies!
1179 let mut reachable = std::collections::HashSet::new();
1180 petgraph::visit::depth_first_search(
1181 &self.graph,
1182 funcs.into_iter().map(|f| self.func_nodes[f]),
1183 |event| {
1184 if let petgraph::visit::DfsEvent::Discover(node, _) = event {
1185 reachable.insert(node);
1186 }
1187 },
1188 );
1189
1190 // Languages like C and C++ require types to be defined before they're used,
1191 // so we need to build a dependency graph between types and functions and
1192 // compute the topological sort. Unfortunately, types don't necessarily
1193 // form a DAG, so how do we do this!?
1194 //
1195 // An "SCC" algorithm gives us a topo-sort of our graph as a DAG,
1196 // but if there are any cycles then they get grouped together into one Mega Node
1197 // called a "Strongly Connected Component" (defined as "every node in an SCC can
1198 // reach every other node in the SCC"). This is why our toposort has elements that
1199 // are Vec<Node> instead of just Node. The order of elements within an SCC
1200 // is arbitrary because they're basically a dependency soup.
1201 //
1202 // If the graph is a proper DAG then every inner Vec will have one element. Otherwise
1203 // cycles will be "hidden" by cramming it into a Vec. In the limit everything will
1204 // get crammed into one inner Vec and that basically tells you everything is super
1205 // fucked.
1206 //
1207 // To unfuck an SCC, languages like C and C++ have "forward declarations" that let
1208 // you reserve a type name before actually specifying its definition. This breaks
1209 // the dependency cycle. Determining the optimal forward declarations is NP-Hard,
1210 // so we opt for the conservative solution of "emit a forward decl for everything
1211 // in the SCC except for 1 node", which is necessary and sufficient if the SCC
1212 // is the complete graph on N nodes.
1213 let mut output = vec![];
1214 for component in &self.def_order {
1215 // Get all the nodes in this SCC, and filter out the ones not reachable from
1216 // the functions we want to emit. (Filtering lets us emit minimal examples for
1217 // test failures so it's easy to reproduce/report!)
1218 let nodes = component.iter().rev().filter(|n| reachable.contains(*n));
1219
1220 // Emit forward decls for everything but the first (last) node
1221 // Note that this cutely does The Right thing (no forward decl)
1222 // for the "happy" case of an SCC of one node (proper DAG).
1223 for &node_idx in nodes.clone().skip(1) {
1224 let node = &self.graph[node_idx];
1225 match *node {
1226 DefinitionGraphNode::Func(func_idx) => {
1227 output.push(Definition::DeclareFunc(func_idx));
1228 }
1229 DefinitionGraphNode::Ty(ty_idx) => {
1230 output.push(Definition::DeclareTy(ty_idx));
1231 }
1232 }
1233 }
1234
1235 // Now that cycles have been broken with forward declares, we can
1236 // just emit everything in the SCC. Note that we emit the type we
1237 // *didn't* forward-declare first. All of its dependencies have
1238 // been resolved by the forward-declares, but it still needs to be
1239 // defined before anyone else in case they refer to it!
1240 for &node_idx in nodes {
1241 let node = &self.graph[node_idx];
1242 match *node {
1243 DefinitionGraphNode::Func(func_idx) => {
1244 output.push(Definition::DefineFunc(func_idx));
1245 }
1246 DefinitionGraphNode::Ty(ty_idx) => {
1247 output.push(Definition::DefineTy(ty_idx));
1248 }
1249 }
1250 }
1251 }
1252
1253 output
1254 }
1255}
1256
1257fn ident_var<T>(val: Option<Ident>, basename: &str, idx: usize, backup_span: &Spanned<T>) -> Ident {
1258 if let Some(val) = val {
1259 val
1260 } else {
1261 let val = format!("{basename}{idx}");
1262 let val = Spanned::new(val, Spanned::span(backup_span));
1263 Ident {
1264 was_blank: false,
1265 val,
1266 }
1267 }
1268}