kdl_script/
types.rs

1//! The type checker and types!
2//!
3//! The entry point is [`typeck`][] which is implicitly
4//! handled by [`Compiler::compile_path`][] or [`Compiler::compile_string`][]
5//! and will produce a [`TypedProgram`][].
6//!
7//! You should then call [`TypedProgram::definition_graph`][] with your
8//! target backend's [`PunEnv`][] to resolve all the [`PunTy`]s and get a
9//! final [`DefinitionGraph`][].
10//!
11//! You should then call [`DefinitionGraph::definitions`][] with the set
12//! of functions you want to emit (usually [`TypedProgram::all_funcs`][])
13//! to get the final forward-decls and definitions your target should emit
14//! to generate its program.
15//!
16//! If a test (function) fails, you can pass just that function to
17//! [`DefinitionGraph::definitions`][] to get a minimized program for just
18//! that one function.
19//!
20//! The type system is phased like this to allow work to be reused and shared
21//! where possible. Each of the above "lowerings" represents increased levels
22//! of specificity:
23//!
24//! * [`TypedProgram`][] is abstract over all possible backends and can be computed once.
25//! * [`DefinitionGraph`][] is for a concrete backend but still abstract over what parts
26//!   of the program you might care about emitting. Computed once per backend config ([`PunEnv`]).
27//! * [`DefinitionGraph::definitions`][] is the final concrete program we want to emit.
28//!
29//! In principle a backend emitting various configs for a single [`TypedProgram`][] can
30//! share everything for a specific [`TyIdx`][] or [`FuncIdx`][], except they need to be
31//! careful about [`PunTy`][]s which can have [`DefinitionGraph`][]-specific lowerings...
32//! so really you should only recycle state created for a specific [`DefinitionGraph`]!
33//!
34//! FIXME: unlike [`AliasTy`][]s, [`PunTy`][]s really *should* completely evaporate in the
35//! backend's lowering. Perhaps we should do something in [`TypedProgram`][] to actually
36//! make them transparent?
37//!
38//! While performance isn't a huge concern for this project, combinatorics do get
39//! kind of out of control so work sharing is kinda important, especially as the backends
40//! get more complex! Also it's just nice to handle backend-agnostic issues once to keep
41//! things simple and correct.
42
43use std::cmp::Ordering;
44use std::collections::HashMap;
45use std::collections::HashSet;
46use std::sync::Arc;
47
48use miette::{Diagnostic, NamedSource, SourceSpan};
49use petgraph::graph::DiGraph;
50use petgraph::graph::NodeIndex;
51use thiserror::Error;
52
53use crate::parse::*;
54use crate::spanned::*;
55use crate::Compiler;
56use crate::Result;
57
58/// An error that occured while processing the types of a program.
59#[derive(Debug, Error, Diagnostic)]
60#[error("{message}")]
61pub struct KdlScriptTypeError {
62    pub message: String,
63    #[source_code]
64    pub src: Arc<NamedSource>,
65    #[label]
66    pub span: SourceSpan,
67    #[diagnostic(help)]
68    pub help: Option<String>,
69}
70
71/// A program that has had its symbolic types resolved to actual type ids.
72///
73/// Aliases and Puns are not fully resolved at this point.
74///
75/// Aliases still exist so that you can emit the target language's form of
76/// an alias if you want to most accurately express the input program.
77///
78/// Puns still exist because a TypedProgram is abstract over every possible
79/// output language to share the workload between each concrete backend.
80/// The next step in lowering the program is to ask it to resolve
81/// the puns for a specific [`crate::PunEnv`][] with [`TypedProgram::definition_graph`].
82/// Which will also handle computing the order of declarations for languages like C.
83#[derive(Debug)]
84pub struct TypedProgram {
85    tcx: TyCtx,
86    funcs: Vec<Func>,
87    builtin_funcs_start: usize,
88}
89
90/// A type id
91pub type TyIdx = usize;
92/// A function id
93pub type FuncIdx = usize;
94
95/// The actual structure of a type
96///
97/// This may be either a nominal, structural, or primitive type.
98///
99/// Any types that this type references will already have been normalized to a [`TyIdx`][]
100/// so you don't have to worry about name resolution or interning/memoizing. Notably
101/// all uses of `[u32; 5]` will have the same [`TyIdx`][], although `[MyU32Alias; 5]` will
102/// be get a separate type id to allow a backend to more accurately reproduce the input program.
103#[derive(Debug, Clone, PartialEq, Eq, Hash)]
104pub enum Ty {
105    /// A primitive (int, float, bool, ptr)
106    Primitive(PrimitiveTy),
107    /// A nominal struct
108    Struct(StructTy),
109    /// A nominal untagged union
110    Union(UnionTy),
111    /// A nominal C-style enum (see `Tagged` for a full rust-style enum)
112    Enum(EnumTy),
113    /// A nominal tagged union (rust-style enum, see `Enum` for a c-style enum)
114    Tagged(TaggedTy),
115    /// A transparent type alias (like typed)
116    Alias(AliasTy),
117    /// A type pun that can have different underlying types for different targets
118    Pun(PunTy),
119    /// A fixed-length array
120    Array(ArrayTy),
121    /// A reference to a type (behaves as if is the Pointee, but just passed by-ref)
122    Ref(RefTy),
123    /// Empty tuple -- `()`
124    Empty,
125}
126
127impl Ty {
128    pub fn is_nominal(&self) -> bool {
129        match self {
130            Ty::Primitive(_) => false,
131            Ty::Struct(_) => true,
132            Ty::Union(_) => true,
133            Ty::Enum(_) => true,
134            Ty::Tagged(_) => true,
135            Ty::Alias(_) => true,
136            Ty::Pun(_) => true,
137            Ty::Array(_) => false,
138            Ty::Ref(_) => false,
139            Ty::Empty => false,
140        }
141    }
142}
143
144/// A function
145#[derive(Debug, Clone)]
146pub struct Func {
147    /// The function's name
148    pub name: Ident,
149    /// The function's inputs
150    pub inputs: Vec<Arg>,
151    /// The function's outputs (note that outparams will appear as Ty::Ref outputs!)
152    pub outputs: Vec<Arg>,
153    /// Any attributes hanging off the function
154    pub attrs: Vec<Attr>,
155    #[cfg(feature = "eval")]
156    /// The body of the function (TBD, not needed for abi-cafe)
157    pub body: (),
158}
159
160/// A function argument (input or output).
161#[derive(Debug, Clone)]
162pub struct Arg {
163    /// The name of the argument
164    pub name: Ident,
165    /// The type of the arg
166    pub ty: TyIdx,
167}
168
169/// A primitive
170#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
171pub enum PrimitiveTy {
172    /// `i8` / `int8_t`
173    I8,
174    /// `i16` / `int16_t`
175    I16,
176    /// `i32` / `int32_t`
177    I32,
178    /// `i64` / `int64_t`
179    I64,
180    /// `i128` / `int128_t`
181    I128,
182    /// `i256` / `int256_t`
183    I256,
184    /// `u8` / `uint8_t`
185    U8,
186    /// `u16` / `uint16_t`
187    U16,
188    /// `u32` / `uint32_t`
189    U32,
190    /// `u64` / `uint64_t`
191    U64,
192    /// `u128` / `uint128_t`
193    U128,
194    /// `u256` / `uint256_t`
195    U256,
196    /// `f16` / `half`
197    F16,
198    /// `f32` / `float`
199    F32,
200    /// `f64` / `double`
201    F64,
202    /// `f128` / `quad`
203    F128,
204    /// `bool`
205    Bool,
206    /// An opaque pointer (like `void*`)
207    Ptr,
208}
209
210pub const PRIMITIVES: &[(&str, PrimitiveTy)] = &[
211    ("i8", PrimitiveTy::I8),
212    ("i16", PrimitiveTy::I16),
213    ("i32", PrimitiveTy::I32),
214    ("i64", PrimitiveTy::I64),
215    ("i128", PrimitiveTy::I128),
216    ("i256", PrimitiveTy::I256),
217    ("u8", PrimitiveTy::U8),
218    ("u16", PrimitiveTy::U16),
219    ("u32", PrimitiveTy::U32),
220    ("u64", PrimitiveTy::U64),
221    ("u128", PrimitiveTy::U128),
222    ("u256", PrimitiveTy::U256),
223    ("f16", PrimitiveTy::F16),
224    ("f32", PrimitiveTy::F32),
225    ("f64", PrimitiveTy::F64),
226    ("f128", PrimitiveTy::F128),
227    ("bool", PrimitiveTy::Bool),
228    ("ptr", PrimitiveTy::Ptr),
229];
230
231/// The Ty of a nominal struct.
232#[derive(Debug, Clone, PartialEq, Eq, Hash)]
233pub struct StructTy {
234    pub name: Ident,
235    pub fields: Vec<FieldTy>,
236    pub attrs: Vec<Attr>,
237    /// True if all fields had was_blank set, indicating this could be emitted as a tuple-struct
238    pub all_fields_were_blank: bool,
239}
240
241/// The Ty of an untagged union.
242///
243/// See [`TaggedTy`][] for a tagged union.
244#[derive(Debug, Clone, PartialEq, Eq, Hash)]
245pub struct UnionTy {
246    pub name: Ident,
247    pub fields: Vec<FieldTy>,
248    pub attrs: Vec<Attr>,
249}
250
251/// The Ty of an Enum.
252#[derive(Debug, Clone, PartialEq, Eq, Hash)]
253pub struct EnumTy {
254    pub name: Ident,
255    pub variants: Vec<EnumVariantTy>,
256    pub attrs: Vec<Attr>,
257}
258
259/// An enum variant
260#[derive(Debug, Clone, PartialEq, Eq, Hash)]
261pub struct EnumVariantTy {
262    pub name: Ident,
263    // pub val: LiteralExpr,
264}
265
266/// The Ty of a tagged union (rust-style enum).
267///
268/// See [`UnionTy`][] for an untagged union.
269///
270/// See [`EnumTy`][] for a c-style enum.
271#[derive(Debug, Clone, PartialEq, Eq, Hash)]
272pub struct TaggedTy {
273    pub name: Ident,
274    pub variants: Vec<TaggedVariantTy>,
275    pub attrs: Vec<Attr>,
276}
277
278/// A variant for a tagged union.
279#[derive(Debug, Clone, PartialEq, Eq, Hash)]
280pub struct TaggedVariantTy {
281    pub name: Ident,
282    pub fields: Option<Vec<FieldTy>>,
283    /// True if all fields have was_blank set, indicating this could be emitted as a tuple-variant
284    pub all_fields_were_blank: bool,
285}
286
287/// The Ty of a transparent type alias.
288///
289/// i.e. `type name = real` in rust
290///
291/// i.e. `typedef real name` in C++
292#[derive(Debug, Clone, PartialEq, Eq, Hash)]
293pub struct AliasTy {
294    pub name: Ident,
295    pub real: TyIdx,
296    pub attrs: Vec<Attr>,
297}
298
299/// A field of a [`StructTy`][] or [`UnionTy`][].
300#[derive(Debug, Clone, PartialEq, Eq, Hash)]
301pub struct FieldTy {
302    pub idx: usize,
303    pub ident: Ident,
304    pub ty: TyIdx,
305}
306
307/// The Ty of a fixed length array.
308#[derive(Debug, Clone, PartialEq, Eq, Hash)]
309pub struct ArrayTy {
310    pub elem_ty: TyIdx,
311    pub len: u64,
312}
313
314/// The Ty of a reference (transparent pointer).
315///
316/// This is used to represent passing a value by-reference, and so backends
317/// should consider the "value" to be the pointee. If you want to test that
318/// a pointer doesn't have its value corrupted but don't care about the pointee,
319/// use `PrimitiveTy::Ptr`.
320///
321/// When used in the `outputs` of a [`Func`], this expresses an out-param
322/// that the caller is responsible for "allocating" (and initializing?) and
323/// the callee is responsible for "writing" the value to it. The caller then
324/// checks the value just like other outputs.
325///
326/// Out-params should appear after "normal" inputs but before vararg inputs,
327/// with the name specified.
328#[derive(Debug, Clone, PartialEq, Eq, Hash)]
329pub struct RefTy {
330    pub pointee_ty: TyIdx,
331}
332
333/// The Ty of a Pun.
334///
335/// Puns express the fact that different languages might express a type
336/// in completely different ways but we expect the layout and/or ABI to
337/// match.
338///
339/// e.g. `Option<&T>` in Rust is equivalent to `T*` in C!
340///
341/// Resolve this with [`TypedProgram::resolve_pun`][].
342#[derive(Debug, Clone, PartialEq, Eq, Hash)]
343pub struct PunTy {
344    pub name: Ident,
345    pub blocks: Vec<PunBlockTy>,
346    pub attrs: Vec<Attr>,
347}
348
349/// A block for a [`PunTy`][]
350#[derive(Debug, Clone, PartialEq, Eq, Hash)]
351pub struct PunBlockTy {
352    pub selector: PunSelector,
353    pub real: TyIdx,
354}
355
356/// Information on all the types.
357///
358/// The key function of TyCtx is to `memoize` all parsed types (TyName) into
359/// type ids (TyIdx), to enable correct type comparison. Two types are equal
360/// *if and only if* they have the same TyIdx.
361///
362/// This is necessary because *nominal* types (TyName::Named, i.e. structs) can
363/// be messy due to shenanigans like captures/scoping/shadowing/inference. Types
364/// may refer to names that are out of scope, and two names that are equal
365/// (as strings) may not actually refer to the same type declaration.
366///
367/// To handle this, whenever a new named type is declared ([TyCtx::push_nominal_decl_incomplete][]),
368/// we generate a unique type id ([`TyIdx`][]) for it. Then whenever we encounter
369/// a reference to a Named type, we lookup the currently in scope TyIdx for that
370/// name, and use that instead. Named type scoping is managed by `envs`.
371///
372/// Replacing type names with type ids requires a change of representation,
373/// which is why we have [`Ty`][]. A Ty is the *structure* of a type with all types
374/// it refers to resolved to TyIdx's (e.g. a field of a tuple, the return type of a function).
375/// For convenience, non-typing metadata may also be stored in a Ty.
376///
377/// So a necessary intermediate step of converting an Ident to a TyIdx is to first
378/// convert it to a Ty. This intermediate value is stored in `tys`.
379/// If you have a TyIdx, you can get its Ty with [`realize_ty`][]. This lets you
380/// e.g. check if a value being called is actually a Func, and if it is,
381/// what the type ids of its arguments/return types are.
382///
383/// `ty_map` stores all the *structural* Tys we've seen before (everything that
384/// *isn't* TyName::Named), ensuring two structural types have the same TyIdx.
385/// i.e. `[u32; 4]` will have the same TyIdx everywhere it occurs.
386#[derive(Debug)]
387pub(crate) struct TyCtx {
388    /// The source code this is from, for resolving spans/errors.
389    src: Arc<NamedSource>,
390
391    /// The list of every known type.
392    ///
393    /// These are the "canonical" copies of each type. Types are
394    /// registered here via `memoize`, which returns a TyIdx into
395    /// this array.
396    ///
397    /// Types should be compared by checking if they have the same
398    /// TyIdx. This allows you to properly compare nominal types
399    /// in the face of shadowing and similar situations.
400    tys: Vec<Ty>,
401
402    /// Mappings from structural types we've seen to type indices.
403    ///
404    /// This is used to get the canonical TyIdx of a structural type
405    /// (including builtin primitives).
406    ///
407    /// Nominal types (structs) are stored in `envs`, because they
408    /// go in and out of scope.
409    ty_map: HashMap<Ty, TyIdx>,
410
411    ty_facts: HashMap<TyIdx, TypeFact>,
412
413    /// Scoped type info, reflecting the fact that struct definitions
414    /// and variables come in and out of scope.
415    ///
416    /// These values are "cumulative", so type names and variables
417    /// should be looked up by searching backwards in this array.
418    ///
419    /// If nothing is found, that type name / variable name is undefined
420    /// at this point in the program.
421    envs: Vec<CheckEnv>,
422}
423
424#[derive(Debug, Clone)]
425struct TypeFact {
426    contains_ref: bool,
427}
428
429/// Information about types for a specific scope.
430#[derive(Debug)]
431struct CheckEnv {
432    /// The struct definitions and TyIdx's
433    tys: HashMap<Ident, TyIdx>,
434}
435
436/// Take a ParsedProgram and produce a TypedProgram for it!
437pub fn typeck(comp: &mut Compiler, parsed: &ParsedProgram) -> Result<TypedProgram> {
438    let mut tcx = TyCtx {
439        src: comp.source.clone().unwrap(),
440        tys: vec![],
441        ty_map: HashMap::new(),
442        ty_facts: HashMap::new(),
443        envs: vec![],
444    };
445
446    // Add global builtins
447    tcx.envs.push(CheckEnv {
448        tys: HashMap::new(),
449    });
450    tcx.add_builtins();
451
452    // Put user-defined types in a separate scope just to be safe
453    tcx.envs.push(CheckEnv {
454        tys: HashMap::new(),
455    });
456
457    // Add all the user defined types
458    for (ty_name, _ty_decl) in &parsed.tys {
459        let _ty_idx = tcx.push_nominal_decl_incomplete(ty_name.clone());
460    }
461    for (ty_name, ty_decl) in &parsed.tys {
462        tcx.complete_nominal_decl(ty_name, ty_decl)?;
463    }
464
465    let funcs = parsed
466        .funcs
467        .iter()
468        .map(|(_func_name, func_decl)| -> Result<Func> {
469            let inputs = func_decl
470                .inputs
471                .iter()
472                .enumerate()
473                .map(|(idx, var)| -> Result<Arg> {
474                    let name = ident_var(var.name.clone(), "arg", idx, &var.ty);
475                    let ty = tcx.memoize_ty(&var.ty)?;
476                    Ok(Arg { name, ty })
477                })
478                .collect::<Result<Vec<_>>>()?;
479            let outputs = func_decl
480                .outputs
481                .iter()
482                .enumerate()
483                .map(|(idx, var)| {
484                    let name = ident_var(var.name.clone(), "out", idx, &var.ty);
485                    let ty = tcx.memoize_ty(&var.ty)?;
486                    Ok(Arg { name, ty })
487                })
488                .collect::<Result<Vec<_>>>()?;
489
490            let name = func_decl.name.clone();
491            let attrs = func_decl.attrs.clone();
492            Ok(Func {
493                name,
494                inputs,
495                outputs,
496                attrs,
497                body: (),
498            })
499        })
500        .collect::<Result<Vec<_>>>()?;
501
502    // Now that everything's added, compute some facts
503    tcx.compute_ty_facts()?;
504
505    let builtin_funcs_start = parsed.builtin_funcs_start;
506    Ok(TypedProgram {
507        tcx,
508        funcs,
509        builtin_funcs_start,
510    })
511}
512
513impl TyCtx {
514    /// Add the builtin types to the TyCtx
515    fn add_builtins(&mut self) {
516        let builtins = PRIMITIVES
517            .iter()
518            .map(|(name, prim)| (*name, Ty::Primitive(*prim)))
519            .chain(Some(("()", Ty::Empty)));
520
521        for (ty_name, ty) in builtins {
522            let ty_idx = self.tys.len();
523            self.tys.push(ty);
524            self.envs
525                .last_mut()
526                .unwrap()
527                .tys
528                .insert(Ident::from(ty_name.to_owned()), ty_idx);
529        }
530    }
531
532    /// Register a new nominal struct in this scope.
533    ///
534    /// This creates a valid TyIdx for the type, but the actual Ty
535    /// while be garbage (Ty::Empty arbitrarily) and needs to be
536    /// filled in properly with [`TyCtx::complete_nominal_decl`][].
537    ///
538    /// This two-phase system is necessary to allow nominal types to
539    /// be unordered or self-referential.
540    fn push_nominal_decl_incomplete(&mut self, ty_name: Ident) -> TyIdx {
541        let ty_idx = self.tys.len();
542        let dummy_ty = Ty::Empty;
543        self.tys.push(dummy_ty);
544        self.envs.last_mut().unwrap().tys.insert(ty_name, ty_idx);
545        ty_idx
546    }
547
548    /// Complete a nominal decl created with [`TyCtx::push_nominal_decl_incomplete`][].
549    fn complete_nominal_decl(&mut self, ty_name: &Ident, ty_decl: &TyDecl) -> Result<()> {
550        // This failing is an ICE and not a user issue!
551        let ty_idx = self
552            .resolve_nominal_ty(ty_name)
553            .expect("completing a nominal ty that hasn't been decl'd");
554        let ty = self.memoize_nominal_parts(ty_decl)?;
555        self.tys[ty_idx] = ty;
556        Ok(())
557    }
558
559    /// Memoize the parts of a nominal ty.
560    fn memoize_nominal_parts(&mut self, ty_decl: &TyDecl) -> Result<Ty> {
561        let ty = match ty_decl {
562            TyDecl::Struct(decl) => {
563                let fields = decl
564                    .fields
565                    .iter()
566                    .enumerate()
567                    .map(|(idx, f)| {
568                        Ok(FieldTy {
569                            idx,
570                            ident: ident_var(f.name.clone(), "field", idx, &f.ty),
571                            ty: self.memoize_ty(&f.ty)?,
572                        })
573                    })
574                    .collect::<Result<Vec<_>>>()?;
575                let all_fields_were_blank = fields.iter().all(|f| f.ident.was_blank);
576                Ty::Struct(StructTy {
577                    name: decl.name.clone(),
578                    fields,
579                    attrs: decl.attrs.clone(),
580                    all_fields_were_blank,
581                })
582            }
583            TyDecl::Union(decl) => {
584                let fields = decl
585                    .fields
586                    .iter()
587                    .enumerate()
588                    .map(|(idx, f)| {
589                        Ok(FieldTy {
590                            idx,
591                            ident: ident_var(f.name.clone(), "field", idx, &f.ty),
592                            ty: self.memoize_ty(&f.ty)?,
593                        })
594                    })
595                    .collect::<Result<Vec<_>>>()?;
596                Ty::Union(UnionTy {
597                    name: decl.name.clone(),
598                    fields,
599                    attrs: decl.attrs.clone(),
600                })
601            }
602            TyDecl::Enum(decl) => {
603                let variants = decl
604                    .variants
605                    .iter()
606                    .map(|v| EnumVariantTy {
607                        name: v.name.clone(),
608                    })
609                    .collect::<Vec<_>>();
610                Ty::Enum(EnumTy {
611                    name: decl.name.clone(),
612                    variants,
613                    attrs: decl.attrs.clone(),
614                })
615            }
616            TyDecl::Tagged(decl) => {
617                let variants = decl
618                    .variants
619                    .iter()
620                    .map(|v| {
621                        let fields = if let Some(fields) = &v.fields {
622                            Some(
623                                fields
624                                    .iter()
625                                    .enumerate()
626                                    .map(|(idx, f)| {
627                                        Ok(FieldTy {
628                                            idx,
629                                            ident: ident_var(f.name.clone(), "field", idx, &f.ty),
630                                            ty: self.memoize_ty(&f.ty)?,
631                                        })
632                                    })
633                                    .collect::<Result<Vec<_>>>()?,
634                            )
635                        } else {
636                            None
637                        };
638                        let all_fields_were_blank = fields
639                            .as_deref()
640                            .unwrap_or_default()
641                            .iter()
642                            .all(|f| f.ident.was_blank);
643                        Ok(TaggedVariantTy {
644                            name: v.name.clone(),
645                            fields,
646                            all_fields_were_blank,
647                        })
648                    })
649                    .collect::<Result<Vec<_>>>()?;
650                Ty::Tagged(TaggedTy {
651                    name: decl.name.clone(),
652                    variants,
653                    attrs: decl.attrs.clone(),
654                })
655            }
656            TyDecl::Alias(decl) => {
657                let real_ty = self.memoize_ty(&decl.alias)?;
658                Ty::Alias(AliasTy {
659                    name: decl.name.clone(),
660                    real: real_ty,
661                    attrs: decl.attrs.clone(),
662                })
663            }
664            TyDecl::Pun(decl) => {
665                let blocks = decl
666                    .blocks
667                    .iter()
668                    .map(|block| {
669                        // !!! If this ever becomes fallible we'll want a proper stack guard to pop!
670                        self.envs.push(CheckEnv {
671                            tys: HashMap::new(),
672                        });
673                        let real_decl = &block.decl;
674                        let real = self.push_nominal_decl_incomplete(decl.name.clone());
675                        self.complete_nominal_decl(&decl.name, real_decl)?;
676                        self.envs.pop();
677
678                        Ok(PunBlockTy {
679                            selector: block.selector.clone(),
680                            real,
681                        })
682                    })
683                    .collect::<Result<Vec<_>>>()?;
684
685                Ty::Pun(PunTy {
686                    name: decl.name.clone(),
687                    blocks,
688                    attrs: decl.attrs.clone(),
689                })
690            }
691        };
692        Ok(ty)
693    }
694
695    /// Resolve the type id (TyIdx) associated with a nominal type (struct name),
696    /// at this point in the program.
697    fn resolve_nominal_ty(&mut self, ty_name: &str) -> Option<TyIdx> {
698        for env in self.envs.iter_mut().rev() {
699            if let Some(ty) = env.tys.get(ty_name) {
700                return Some(*ty);
701            }
702        }
703        None
704    }
705
706    /// Converts a TyName (parsed type) into a TyIdx (type id).
707    ///
708    /// All TyNames in the program must be memoized, as this is the only reliable
709    /// way to do type comparisons. See the top level docs of TyIdx for details.
710    fn memoize_ty(&mut self, ty_ref: &Spanned<Tydent>) -> Result<TyIdx> {
711        let ty_idx = match &**ty_ref {
712            Tydent::Empty => self.memoize_inner(Ty::Empty),
713            Tydent::Ref(pointee_ty_ref) => {
714                let pointee_ty = self.memoize_ty(pointee_ty_ref)?;
715                self.memoize_inner(Ty::Ref(RefTy { pointee_ty }))
716            }
717            Tydent::Array(elem_ty_ref, len) => {
718                let elem_ty = self.memoize_ty(elem_ty_ref)?;
719                self.memoize_inner(Ty::Array(ArrayTy { elem_ty, len: *len }))
720            }
721            Tydent::Name(name) => {
722                // Nominal types take a separate path because they're scoped
723                if let Some(ty_idx) = self.resolve_nominal_ty(name) {
724                    ty_idx
725                } else {
726                    return Err(KdlScriptTypeError {
727                        message: format!("use of undefined type name: {name}"),
728                        src: self.src.clone(),
729                        span: Spanned::span(name),
730                        help: None,
731                    })?;
732                }
733            }
734        };
735
736        Ok(ty_idx)
737    }
738
739    /// Converts a Ty (structural type with all subtypes resolved) into a TyIdx (type id).
740    fn memoize_inner(&mut self, ty: Ty) -> TyIdx {
741        if let Some(idx) = self.ty_map.get(&ty) {
742            *idx
743        } else {
744            let ty1 = ty.clone();
745            let ty2 = ty;
746            let idx = self.tys.len();
747
748            self.ty_map.insert(ty1, idx);
749            self.tys.push(ty2);
750            idx
751        }
752    }
753
754    /// Get the type-structure (Ty) associated with this type id (TyIdx).
755    pub fn realize_ty(&self, ty: TyIdx) -> &Ty {
756        self.tys
757            .get(ty)
758            .expect("Internal Compiler Error: invalid TyIdx")
759    }
760
761    /// Resolve a [`PunTy`][] based on the current [`PunEnv`][].
762    pub fn resolve_pun(&self, pun: &PunTy, env: &PunEnv) -> Result<TyIdx> {
763        for block in &pun.blocks {
764            if block.selector.matches(env) {
765                return Ok(block.real);
766            }
767        }
768
769        Err(KdlScriptTypeError {
770            message: "Failed to find applicable pun for this target environment".to_string(),
771            src: self.src.clone(),
772            span: Spanned::span(&pun.name),
773            help: Some(format!("Add another block that matches {:#?}", env)),
774        })?
775    }
776
777    fn compute_ty_facts(&mut self) -> Result<()> {
778        let mut to_compute = (0..self.tys.len()).collect::<Vec<TyIdx>>();
779        let mut already_visited = HashSet::new();
780
781        fn aggregate_facts(
782            this: &mut TyCtx,
783            to_compute: &mut Vec<TyIdx>,
784            already_visited: &mut HashSet<TyIdx>,
785            cur_ty: TyIdx,
786            child_tys: Vec<TyIdx>,
787        ) -> Result<Option<TypeFact>> {
788            let mut facts = TypeFact {
789                contains_ref: false,
790            };
791            let mut missing_info = vec![];
792            for child_ty in child_tys {
793                let Some(child_fact) = this.ty_facts.get(&child_ty) else {
794                    missing_info.push(child_ty);
795                    continue;
796                };
797                let TypeFact { contains_ref } = child_fact;
798                facts.contains_ref |= contains_ref;
799            }
800
801            // If everything resolved, great, we're done
802            if missing_info.is_empty() {
803                return Ok(Some(facts));
804            }
805
806            // Otherwise, restack ourself and the missing children, hopefully by the time
807            // the children resolve we'll have a definite answer.
808            //
809            // However if we *already* restacked ourselves, at this point assume there's
810            // an infinite type without the proper indirection of a reference!
811            if already_visited.contains(&cur_ty) {
812                Err(KdlScriptTypeError {
813                    message: "This type is infinitely recursive without an indirection!".to_owned(),
814                    src: this.src.clone(),
815                    span: this.span_for_ty_decl(cur_ty),
816                    help: Some("Add a reference (&) somewhere".to_owned()),
817                })?;
818            }
819            already_visited.insert(cur_ty);
820            to_compute.push(cur_ty);
821            to_compute.extend(missing_info);
822
823            Ok(None)
824        }
825
826        while let Some(ty_idx) = to_compute.pop() {
827            let facts = match self.realize_ty(ty_idx) {
828                Ty::Primitive(_) => Some(TypeFact {
829                    contains_ref: false,
830                }),
831                Ty::Empty => Some(TypeFact {
832                    contains_ref: false,
833                }),
834                Ty::Enum(_) => Some(TypeFact {
835                    contains_ref: false,
836                }),
837                Ty::Ref(_) => Some(TypeFact { contains_ref: true }),
838
839                Ty::Alias(ty) => {
840                    let child_tys = vec![ty.real];
841                    aggregate_facts(
842                        self,
843                        &mut to_compute,
844                        &mut already_visited,
845                        ty_idx,
846                        child_tys,
847                    )?
848                }
849                Ty::Array(ty) => {
850                    let child_tys = vec![ty.elem_ty];
851                    aggregate_facts(
852                        self,
853                        &mut to_compute,
854                        &mut already_visited,
855                        ty_idx,
856                        child_tys,
857                    )?
858                }
859                Ty::Struct(ty) => {
860                    let child_tys = ty.fields.iter().map(|f| f.ty).collect();
861                    aggregate_facts(
862                        self,
863                        &mut to_compute,
864                        &mut already_visited,
865                        ty_idx,
866                        child_tys,
867                    )?
868                }
869                Ty::Union(ty) => {
870                    let child_tys = ty.fields.iter().map(|f| f.ty).collect();
871                    aggregate_facts(
872                        self,
873                        &mut to_compute,
874                        &mut already_visited,
875                        ty_idx,
876                        child_tys,
877                    )?
878                }
879                Ty::Tagged(ty) => {
880                    let child_tys = ty
881                        .variants
882                        .iter()
883                        .flat_map(|v| v.fields.as_deref().unwrap_or_default().iter().map(|f| f.ty))
884                        .collect();
885                    aggregate_facts(
886                        self,
887                        &mut to_compute,
888                        &mut already_visited,
889                        ty_idx,
890                        child_tys,
891                    )?
892                }
893                Ty::Pun(ty) => {
894                    let child_tys = ty.blocks.iter().map(|f| f.real).collect();
895                    aggregate_facts(
896                        self,
897                        &mut to_compute,
898                        &mut already_visited,
899                        ty_idx,
900                        child_tys,
901                    )?
902                }
903            };
904
905            if let Some(facts) = facts {
906                self.ty_facts.insert(ty_idx, facts);
907            }
908        }
909        Ok(())
910    }
911
912    /*
913    pub fn pointee_ty(&self, ty: TyIdx) -> TyIdx {
914        if let Ty::TypedPtr(pointee) = self.realize_ty(ty) {
915            *pointee
916        } else {
917            unreachable!("expected typed to be pointer");
918        }
919    }
920     */
921    pub fn span_for_ty_decl(&self, ty: TyIdx) -> SourceSpan {
922        match self.realize_ty(ty) {
923            Ty::Primitive(_) => SourceSpan::from(1..1),
924            Ty::Empty => SourceSpan::from(1..1),
925            Ty::Struct(ty) => Spanned::span(&ty.name),
926            Ty::Union(ty) => Spanned::span(&ty.name),
927            Ty::Enum(ty) => Spanned::span(&ty.name),
928            Ty::Tagged(ty) => Spanned::span(&ty.name),
929            Ty::Alias(ty) => Spanned::span(&ty.name),
930            Ty::Pun(ty) => Spanned::span(&ty.name),
931            Ty::Array(ty) => self.span_for_ty_decl(ty.elem_ty),
932            Ty::Ref(ty) => self.span_for_ty_decl(ty.pointee_ty),
933        }
934    }
935    /// Stringify a type.
936    pub fn format_ty(&self, ty: TyIdx) -> String {
937        match self.realize_ty(ty) {
938            Ty::Primitive(prim) => format!("{:?}", prim).to_lowercase(),
939            Ty::Empty => "()".to_string(),
940            Ty::Struct(decl) => format!("{}", decl.name),
941            Ty::Enum(decl) => format!("{}", decl.name),
942            Ty::Tagged(decl) => format!("{}", decl.name),
943            Ty::Union(decl) => format!("{}", decl.name),
944            Ty::Alias(decl) => format!("{}", decl.name),
945            Ty::Pun(decl) => format!("{}", decl.name),
946            Ty::Array(array_ty) => {
947                let inner = self.format_ty(array_ty.elem_ty);
948                format!("[{}; {}]", inner, array_ty.len)
949            }
950            Ty::Ref(ref_ty) => {
951                let inner = self.format_ty(ref_ty.pointee_ty);
952                format!("&{}", inner)
953            }
954        }
955    }
956}
957
958/// A node in the DefinitionGraph
959#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
960enum DefinitionGraphNode {
961    Func(FuncIdx),
962    Ty(TyIdx),
963}
964
965/// A Dependency Graph of all the type/function definitions.
966#[derive(Debug, Clone)]
967pub struct DefinitionGraph {
968    /// The actual Graph
969    graph: DiGraph<DefinitionGraphNode, ()>,
970    /// FuncIdx = NodeIdx
971    func_nodes: Vec<NodeIndex>,
972    /// The Strongly Connected Components in topological order
973    def_order: Vec<Vec<NodeIndex>>,
974}
975
976impl TypedProgram {
977    /// Get the Ty for this TyIdx
978    pub fn realize_ty(&self, ty: TyIdx) -> &Ty {
979        self.tcx.realize_ty(ty)
980    }
981
982    /// Get the Func for this FuncIdx
983    pub fn realize_func(&self, func: FuncIdx) -> &Func {
984        &self.funcs[func]
985    }
986
987    pub fn ty_contains_ref(&self, ty: TyIdx) -> bool {
988        self.tcx.ty_facts[&ty].contains_ref
989    }
990
991    pub fn all_funcs(&self) -> impl Iterator<Item = FuncIdx> {
992        0..self.builtin_funcs_start
993    }
994
995    /// Resolve a [`PunTy`][] based on the current [`PunEnv`][].
996    pub fn resolve_pun(&self, pun: &PunTy, env: &PunEnv) -> Result<TyIdx> {
997        self.tcx.resolve_pun(pun, env)
998    }
999
1000    /// Stringify a type (for debugging).
1001    pub fn format_ty(&self, ty: TyIdx) -> String {
1002        self.tcx.format_ty(ty)
1003    }
1004
1005    /// Compute the dependency graph between types ([`DefinitionGraph`][]).
1006    ///
1007    /// This serves two purposes:
1008    ///
1009    /// * Figuring out the correct order of type/function declarations (and forward declarations)
1010    ///   for languages like C that need that kind of thing (and its prettier for other langs).
1011    ///
1012    /// * Producing minimized examples for subsets of the program (by only emitting the types
1013    ///   needed for a single function).
1014    ///
1015    /// The next step in lowering the program is to query [`DefinitionGraph::definitions`][] with the
1016    /// functions you want to emit!
1017    ///
1018    /// This can fail if the given [`PunEnv`][] fails to resolve a [`PunTy`][].
1019    pub fn definition_graph(&self, env: &PunEnv) -> Result<DefinitionGraph> {
1020        let mut graph = petgraph::graph::DiGraph::new();
1021        let mut nodes = vec![];
1022
1023        // First create all the nodes for all the types
1024        for (ty_idx, _ty) in self.tcx.tys.iter().enumerate() {
1025            let ty_node = graph.add_node(DefinitionGraphNode::Ty(ty_idx));
1026            nodes.push(ty_node);
1027        }
1028
1029        // Now create edges between them for deps
1030        //
1031        // NOTE: it's fine to make an edge from a node to itself, that doesn't
1032        // change anything we do further down with SCCs!
1033        for (ty_idx, ty) in self.tcx.tys.iter().enumerate() {
1034            let ty_node = nodes[ty_idx];
1035            match ty {
1036                Ty::Struct(ty) => {
1037                    for field in &ty.fields {
1038                        let field_ty_node = nodes[field.ty];
1039                        graph.update_edge(ty_node, field_ty_node, ());
1040                    }
1041                }
1042                Ty::Union(ty) => {
1043                    for field in &ty.fields {
1044                        let field_ty_node = nodes[field.ty];
1045                        graph.update_edge(ty_node, field_ty_node, ());
1046                    }
1047                }
1048                Ty::Tagged(ty) => {
1049                    for variant in &ty.variants {
1050                        if let Some(fields) = &variant.fields {
1051                            for field in fields {
1052                                let field_ty_node = nodes[field.ty];
1053                                graph.update_edge(ty_node, field_ty_node, ());
1054                            }
1055                        }
1056                    }
1057                }
1058                Ty::Alias(ty) => {
1059                    let real_ty_node = nodes[ty.real];
1060                    graph.update_edge(ty_node, real_ty_node, ());
1061                }
1062                Ty::Pun(ty) => {
1063                    let real_ty_node = nodes[self.tcx.resolve_pun(ty, env)?];
1064                    graph.update_edge(ty_node, real_ty_node, ());
1065                }
1066                Ty::Array(ty) => {
1067                    let elem_ty_node = nodes[ty.elem_ty];
1068                    graph.update_edge(ty_node, elem_ty_node, ());
1069                }
1070                Ty::Ref(ty) => {
1071                    let pointee_ty_node = nodes[ty.pointee_ty];
1072                    graph.update_edge(ty_node, pointee_ty_node, ());
1073                }
1074                Ty::Enum(_) => {
1075                    // Arguably this can't depend on any types...
1076                    // BUT we should consider whether `@tag i32` is a dependency on i32!
1077                    // These kinds of annotations aren't configured yet though!
1078                }
1079                Ty::Primitive(_) | Ty::Empty => {
1080                    // These types have no deps, no edges to add!
1081                }
1082            }
1083        }
1084
1085        // Add edges from functions to the things they reference
1086        let mut func_nodes = vec![];
1087        for (func_idx, func) in self.funcs.iter().enumerate() {
1088            let func_node = graph.add_node(DefinitionGraphNode::Func(func_idx));
1089            for arg in func.inputs.iter().chain(func.outputs.iter()) {
1090                let arg_ty_node = nodes[arg.ty];
1091                graph.update_edge(func_node, arg_ty_node, ());
1092            }
1093            func_nodes.push(func_node);
1094        }
1095
1096        // Now compute the Strongly Connected Components!
1097        // See the comment in `DefinitionGraph::definitions` for details on what this is!
1098        let mut def_order = petgraph::algo::kosaraju_scc(&graph);
1099
1100        // Further refine the SCC order!
1101        //
1102        // We need all the nominal types to come first.
1103        // This is because the *name* `&MyType` still contains `MyType`,
1104        // so starting the forward declares with it would still refer to an
1105        // undeclared type. Thankfully no such issue exists for starting with
1106        // `MyType`, and all cycles always involve a nominal type!
1107        for component in &mut def_order {
1108            let mut sorted_nodes = std::mem::take(component);
1109            sorted_nodes.sort_by(|&lhs, &rhs| match (&graph[lhs], &graph[rhs]) {
1110                (DefinitionGraphNode::Func(l), DefinitionGraphNode::Func(r)) => l.cmp(r),
1111                (DefinitionGraphNode::Func(_), DefinitionGraphNode::Ty(_)) => Ordering::Less,
1112                (DefinitionGraphNode::Ty(_), DefinitionGraphNode::Func(_)) => Ordering::Greater,
1113                (DefinitionGraphNode::Ty(l), DefinitionGraphNode::Ty(r)) => {
1114                    let lty = self.realize_ty(*l);
1115                    let rty = self.realize_ty(*r);
1116                    rty.is_nominal().cmp(&lty.is_nominal()).then(l.cmp(r))
1117                }
1118            });
1119            *component = sorted_nodes;
1120        }
1121
1122        Ok(DefinitionGraph {
1123            graph,
1124            func_nodes,
1125            def_order,
1126        })
1127    }
1128}
1129
1130/// Kinds of definitions/declarations a backend should emit
1131pub enum Definition {
1132    /// Forward-declare this type
1133    DeclareTy(TyIdx),
1134    /// Define this type fully
1135    DefineTy(TyIdx),
1136    /// Forward-declare the function (only ever necessary with `feature="eval"`)
1137    /// otherwise functions are always roots and never need forward-declares.
1138    DeclareFunc(FuncIdx),
1139    /// Define this function fully
1140    DefineFunc(FuncIdx),
1141}
1142
1143impl DefinitionGraph {
1144    /// Get the exact list of forward-declares and definitions to emit the program!
1145    ///
1146    /// Note that the recommendations are *extremely* agnostic to the target language
1147    /// and will generally recommend you forward-declare or define a lot of types
1148    /// that need no such thing in basically every language.
1149    ///
1150    /// For instance with a definition like:
1151    ///
1152    /// ```kdl
1153    /// struct "SelfReferential" {
1154    ///     me "Option<&SelfReferential>"
1155    ///     val "u32"
1156    /// }
1157    /// ```
1158    ///
1159    /// (Generics aren't currently supported, this is just easier to express.)
1160    ///
1161    /// You will get recommended something like:
1162    ///
1163    /// 1. Define `u32`
1164    /// 2. Forward-declare `SelfReferential`
1165    /// 3. Forward-declare `&SelfReferential`
1166    /// 4. Define `Option<&SelfReferential>`
1167    /// 5. Define `SelfReferential`
1168    /// 6. Define `&SelfReferential`
1169    ///
1170    /// Which contains a lot of things that are nonsensical in basically every language!
1171    /// That's ok! Just ignore the nonsensical recommendations like "declare a primitive"
1172    /// or "forward-declare a reference" if they aren't necessary in your language!
1173    ///
1174    /// A Rust backend would only need to emit 5 (and maybe 4 if it's not Real Option).
1175    ///
1176    /// A C backend would only need to emit 2 and 5 (and maybe 4 if it's not Real Option).
1177    pub fn definitions(&self, funcs: impl IntoIterator<Item = FuncIdx>) -> Vec<Definition> {
1178        // Take the requested functions and compute all their dependencies!
1179        let mut reachable = std::collections::HashSet::new();
1180        petgraph::visit::depth_first_search(
1181            &self.graph,
1182            funcs.into_iter().map(|f| self.func_nodes[f]),
1183            |event| {
1184                if let petgraph::visit::DfsEvent::Discover(node, _) = event {
1185                    reachable.insert(node);
1186                }
1187            },
1188        );
1189
1190        // Languages like C and C++ require types to be defined before they're used,
1191        // so we need to build a dependency graph between types and functions and
1192        // compute the topological sort. Unfortunately, types don't necessarily
1193        // form a DAG, so how do we do this!?
1194        //
1195        // An "SCC" algorithm gives us a topo-sort of our graph as a DAG,
1196        // but if there are any cycles then they get grouped together into one Mega Node
1197        // called a "Strongly Connected Component" (defined as "every node in an SCC can
1198        // reach every other node in the SCC"). This is why our toposort has elements that
1199        // are Vec<Node> instead of just Node. The order of elements within an SCC
1200        // is arbitrary because they're basically a dependency soup.
1201        //
1202        // If the graph is a proper DAG then every inner Vec will have one element. Otherwise
1203        // cycles will be "hidden" by cramming it into a Vec. In the limit everything will
1204        // get crammed into one inner Vec and that basically tells you everything is super
1205        // fucked.
1206        //
1207        // To unfuck an SCC, languages like C and C++ have "forward declarations" that let
1208        // you reserve a type name before actually specifying its definition. This breaks
1209        // the dependency cycle. Determining the optimal forward declarations is NP-Hard,
1210        // so we opt for the conservative solution of "emit a forward decl for everything
1211        // in the SCC except for 1 node", which is necessary and sufficient if the SCC
1212        // is the complete graph on N nodes.
1213        let mut output = vec![];
1214        for component in &self.def_order {
1215            // Get all the nodes in this SCC, and filter out the ones not reachable from
1216            // the functions we want to emit. (Filtering lets us emit minimal examples for
1217            // test failures so it's easy to reproduce/report!)
1218            let nodes = component.iter().rev().filter(|n| reachable.contains(*n));
1219
1220            // Emit forward decls for everything but the first (last) node
1221            // Note that this cutely does The Right thing (no forward decl)
1222            // for the "happy" case of an SCC of one node (proper DAG).
1223            for &node_idx in nodes.clone().skip(1) {
1224                let node = &self.graph[node_idx];
1225                match *node {
1226                    DefinitionGraphNode::Func(func_idx) => {
1227                        output.push(Definition::DeclareFunc(func_idx));
1228                    }
1229                    DefinitionGraphNode::Ty(ty_idx) => {
1230                        output.push(Definition::DeclareTy(ty_idx));
1231                    }
1232                }
1233            }
1234
1235            // Now that cycles have been broken with forward declares, we can
1236            // just emit everything in the SCC. Note that we emit the type we
1237            // *didn't* forward-declare first. All of its dependencies have
1238            // been resolved by the forward-declares, but it still needs to be
1239            // defined before anyone else in case they refer to it!
1240            for &node_idx in nodes {
1241                let node = &self.graph[node_idx];
1242                match *node {
1243                    DefinitionGraphNode::Func(func_idx) => {
1244                        output.push(Definition::DefineFunc(func_idx));
1245                    }
1246                    DefinitionGraphNode::Ty(ty_idx) => {
1247                        output.push(Definition::DefineTy(ty_idx));
1248                    }
1249                }
1250            }
1251        }
1252
1253        output
1254    }
1255}
1256
1257fn ident_var<T>(val: Option<Ident>, basename: &str, idx: usize, backup_span: &Spanned<T>) -> Ident {
1258    if let Some(val) = val {
1259        val
1260    } else {
1261        let val = format!("{basename}{idx}");
1262        let val = Spanned::new(val, Spanned::span(backup_span));
1263        Ident {
1264            was_blank: false,
1265            val,
1266        }
1267    }
1268}
kdl_script/types.rs

kdl_script/
types.rs