espy_tail/
lib.rs

1//! Compiles an espy abstract syntax tree into bytecode.
2//!
3//! ```rust
4//! use espy_eyes::Lexer;
5//! use espy_ears::Block;
6//! use espy_tail::compile;
7//!
8//! let source = "1 + 2";
9//! let mut lexer = Lexer::from(source).peekable();
10//! let block = Block::new(&mut lexer);
11//! let program = compile(block, source).unwrap();
12//! ```
13
14use espy_ears::Diagnostics;
15use espy_ears::Expression;
16use espy_ears::FunctionBody;
17use espy_ears::Let;
18use espy_ears::Match;
19use espy_ears::Node;
20use espy_ears::Sequence;
21use espy_ears::Set;
22use espy_ears::Statement;
23use espy_ears::Use;
24use espy_ears::{Binding, BindingMethod};
25use espy_ears::{Block, BlockResult};
26use espy_ears::{Rebind, RebindBy};
27use espy_eyes::{Lexigram, Token};
28use espy_heart::prelude::*;
29use std::borrow::Cow;
30use std::iter;
31use std::mem;
32use std::num::NonZero;
33use std::num::{ParseIntError, TryFromIntError};
34
35#[cfg(test)]
36mod tests;
37
38#[must_use]
39#[derive(Clone, Debug, Default)]
40pub struct CompiledProgram {
41    pub executable: Vec<u8>,
42    pub debug_info: Vec<u8>,
43}
44
45/// Compiles a block directly into interpreter-ready bytecode.
46///
47/// # Errors
48///
49/// Returns an error if the block contained error diagnostics or failed to compile.
50pub fn compile<'source>(
51    ast: Box<Block<'source>>,
52    source: &'source str,
53) -> Result<CompiledProgram, Error<'source>> {
54    Program::prepare(ast, source).and_then(Program::compile)
55}
56
57#[derive(Debug)]
58pub enum Error<'source> {
59    /// Emitted when the program is too large (produced bytecode larger than 4GiB)
60    ProgramLimitExceeded,
61    /// Attempted to break out of a scope, but no parent scope accepted unlabeled breaks.
62    InvalidBreak(Token<'source>),
63    /// The AST contained an integer that did not fit into the expected type.
64    InvalidInteger(Token<'source>, ParseIntError),
65    /// The AST contained a string with an invalid escape sequence.
66    InvalidString(Token<'source>, espy_eyes::EscapeError),
67    /// The AST contained an identifier with an invalid escape sequence.
68    InvalidIdentifier(Token<'source>, espy_eyes::EscapeError),
69    /// A variable was referenced that did not exist.
70    UndefinedSymbol(Token<'source>),
71    /// The AST contained an error.
72    ///
73    /// Note that this only contains the first error that the parser encounted.
74    /// Inspecting the AST directly allows you to see all of the errors the parser encountered
75    /// with some additional context.
76    InvalidAst(espy_ears::Error<'source>),
77}
78
79impl<'source> std::fmt::Display for Error<'source> {
80    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
81        match self {
82            Error::ProgramLimitExceeded => write!(
83                f,
84                "program limit exceeded (bytecode must be less than 4GiB)"
85            ),
86            Error::InvalidBreak(_) => write!(
87                f,
88                "attempted to break out of a scope, but no parent scope accepted unlabeled breaks"
89            ),
90            Error::InvalidInteger(_, e) => write!(f, "invalid integer literal: {e}"),
91            Error::InvalidString(_, e) => write!(f, "invalid string literal: {e:?}"),
92            Error::InvalidIdentifier(_, e) => write!(f, "invalid raw identifier: {e:?}"),
93            Error::UndefinedSymbol(_) => write!(f, "undefined symbol"),
94            Error::InvalidAst(e) => write!(f, "failed to parse program: {e:?}"),
95        }
96    }
97}
98
99impl<'source> std::error::Error for Error<'source> {}
100
101impl<'source> Error<'source> {
102    /// Provides the span of source code causing this compiler error,
103    /// if one is available.
104    ///
105    /// Note that this method will return `None` in the case of an invalid ast.
106    /// Inspect ast diagnostics directly if you want to format them.
107    ///
108    /// # Panics
109    ///
110    /// Panics if provided a aosurce string slice that this error did not
111    /// originate from.
112    #[must_use]
113    pub fn locate(&self, source: &'source str) -> Option<(usize, usize)> {
114        let origin = match self {
115            Error::ProgramLimitExceeded => return None,
116            Error::InvalidBreak(token) => token.origin,
117            Error::InvalidInteger(token, _) => token.origin,
118            Error::InvalidString(token, _) => token.origin,
119            Error::InvalidIdentifier(token, _) => token.origin,
120            Error::UndefinedSymbol(token) => token.origin,
121            Error::InvalidAst(_) => return None,
122        };
123        let start = usize::try_from(origin.as_ptr() as isize - source.as_ptr() as isize)
124            .expect("origin must start after source string");
125        let end = start + origin.len();
126        assert!(
127            origin.len() <= source.len(),
128            "origin must be a subset of source string"
129        );
130        Some((start, end))
131    }
132}
133
134impl From<TryFromIntError> for Error<'_> {
135    fn from(_: TryFromIntError) -> Self {
136        Self::ProgramLimitExceeded
137    }
138}
139
140fn try_validate(diagnostics: Diagnostics) -> Result<(), Error> {
141    if let Some(error) = diagnostics.errors.into_iter().next() {
142        Err(Error::InvalidAst(error))
143    } else {
144        Ok(())
145    }
146}
147
148/// For retroactively filling in jump instructions.
149fn resolve_jump(block: &mut ProgramBlock, at: usize) {
150    #[allow(
151        clippy::cast_possible_truncation,
152        reason = "this will produce incorrect code, but compile will return an error later anyways so it's fine"
153    )]
154    let pc = block.len() as ProgramCounter;
155    block.instructions[at..(at + size_of_val(&pc))].copy_from_slice(&pc.to_le_bytes());
156}
157
158fn token_range(token: Token, source: &str) -> Span {
159    let start = usize::try_from(token.origin.as_ptr() as isize - source.as_ptr() as isize)
160        .expect("origin must start after source string");
161    let end = start + token.origin.len();
162    assert!(
163        token.origin.len() <= source.len(),
164        "origin must be a subset of source string"
165    );
166    Span {
167        start,
168        end: end
169            .try_into()
170            .expect("origin should be at least one character"),
171    }
172}
173
174// These are practically used as functions which return iterators over bytes at this point.
175// There isn't a good reason for this other than that they used to be stored for a little while,
176// so if true functions make more sense for any reason this type can be removed.
177#[derive(Clone, Debug, PartialEq, Eq)]
178enum Instruction {
179    /// Copy a value from the given position and put it on the top of the stack.
180    Clone(StackPointer),
181    /// Pop a value off the stack.
182    Pop,
183    /// Pop a value off the stack and write it to the given position.
184    /// Then, pop each value above this position and discard it.
185    Collapse(StackPointer),
186    /// Set the program counter.
187    Jump(ProgramCounter),
188    /// Pop a boolean value off the stack.
189    /// If it is *false*, set the program counter.
190    If(ProgramCounter),
191
192    PushUnit,
193    PushI64(i64),
194    /// Pop the top value off the stack and use it as the return type of the function.
195    ///
196    /// Pop the next value off the stack and use it as the input type of the function.
197    ///
198    /// Pop the following `captures` values off the stack and onto a new stack,
199    /// and then push a function containing the new stack and the proceeding block id to the current stack.
200    /// If the block id is zero (referring to the program entrypoint),
201    /// the function should panic when called (have a body of never).
202    PushFunction {
203        captures: StackPointer,
204        function: BlockId,
205    },
206    PushTrue,
207    PushFalse,
208    /// Pop the top value off the stack;
209    /// it should be unit or a named tuple of types.
210    /// Use this value as the enum's variants.
211    ///
212    /// Push the resulting enum type to the stack.
213    PushEnum,
214    PushString(StringId),
215
216    Add,
217    Sub,
218    Mul,
219    Div,
220    /// Pop the top value off the stack and push it to the function's stack.
221    /// The next value is the function to be called.
222    /// After pushing the value, jump to the function's block id.
223    /// It will return a single value which is placed to the stack in its place.
224    Call,
225    /// Pop the first value off the stack, then pop the second and index it using the first.
226    /// Push the result.
227    Index,
228    /// Pop two values off the stack and combine them into a tuple.
229    /// If one of them is already a tuple, concatenate them (always producing a flat tuple).
230    /// The topmost value should be appended to the value underneath it.
231    Tuple,
232    /// Pop a value off the stack and turn it into a named tuple with a single value,
233    /// and a name according to the following string id.
234    Name(StringId),
235    /// Pop a value off the stack and turn it into a numeric tuple with a single value.
236    Nest,
237    Negative,
238    Pipe,
239    BitwiseAnd,
240    BitwiseOr,
241    BitwiseXor,
242    EqualTo,
243    NotEqualTo,
244    Greater,
245    GreaterEqual,
246    Lesser,
247    LesserEqual,
248    LogicalAnd,
249    LogicalOr,
250    Deref,
251    Set,
252    Matches,
253    /// Pop a value off the stack; this is the bind function.
254    /// Pop the next value off the stack; this is the first argument to the bind function.
255    /// Turn the current state of the interpreter into a function and pass it as the second argument to the bind function.
256    /// Finally, execute the bind function with these arguments.
257    Bind,
258    /// Pop a value off the stack.
259    /// Then turn the current state of the interpreter into a function.
260    /// Return a tuple of these values from the current function.
261    Yield,
262}
263
264struct InstructionIter {
265    instruction: Instruction,
266    index: usize,
267}
268
269impl Iterator for InstructionIter {
270    type Item = u8;
271
272    fn next(&mut self) -> Option<Self::Item> {
273        macro_rules! decompose {
274            ($instruction:expr, $($arg:ident as $offset:literal..=$end:literal),*) => {
275                match self.index {
276                    0 => $instruction,
277                    $(
278                        $offset..=$end => $arg.to_le_bytes()[self.index - $offset],
279                    )*
280                    _ => return None,
281                }
282            };
283        }
284        let byte = match self.instruction {
285            Instruction::Clone(from) => decompose!(instruction::CLONE, from as 1..=4),
286            Instruction::Pop => decompose!(instruction::POP,),
287            Instruction::Collapse(to) => decompose!(instruction::COLLAPSE, to as 1..=4),
288            Instruction::Jump(pc) => decompose!(instruction::JUMP, pc as 1..=4),
289            Instruction::If(pc) => decompose!(instruction::IF, pc as 1..=4),
290
291            Instruction::PushUnit => decompose!(instruction::PUSH_UNIT,),
292            Instruction::PushI64(literal) => decompose!(instruction::PUSH_I64, literal as 1..=8),
293            Instruction::PushFunction { captures, function } => {
294                decompose!(instruction::PUSH_FUNCTION, captures as 1..=4, function as 5..=8)
295            }
296            Instruction::PushTrue => decompose!(instruction::PUSH_TRUE,),
297            Instruction::PushFalse => decompose!(instruction::PUSH_FALSE,),
298            Instruction::PushEnum => decompose!(instruction::PUSH_ENUM,),
299            Instruction::PushString(s) => decompose!(instruction::PUSH_STRING, s as 1..=4),
300
301            Instruction::Add => decompose!(instruction::ADD,),
302            Instruction::Sub => decompose!(instruction::SUB,),
303            Instruction::Mul => decompose!(instruction::MUL,),
304            Instruction::Div => decompose!(instruction::DIV,),
305            Instruction::Call => decompose!(instruction::CALL,),
306            Instruction::Index => decompose!(instruction::INDEX,),
307            Instruction::Tuple => decompose!(instruction::TUPLE,),
308            Instruction::Name(name) => decompose!(instruction::NAME, name as 1..=4),
309            Instruction::Nest => decompose!(instruction::NEST,),
310            Instruction::Negative => decompose!(instruction::NEGATIVE,),
311            Instruction::Pipe => decompose!(instruction::PIPE,),
312            Instruction::BitwiseAnd => decompose!(instruction::BITWISE_AND,),
313            Instruction::BitwiseOr => decompose!(instruction::BITWISE_OR,),
314            Instruction::BitwiseXor => decompose!(instruction::BITWISE_XOR,),
315            Instruction::EqualTo => decompose!(instruction::EQUAL_TO,),
316            Instruction::NotEqualTo => decompose!(instruction::NOT_EQUAL_TO,),
317            Instruction::Greater => decompose!(instruction::GREATER,),
318            Instruction::GreaterEqual => decompose!(instruction::GREATER_EQUAL,),
319            Instruction::Lesser => decompose!(instruction::LESSER,),
320            Instruction::LesserEqual => decompose!(instruction::LESSER_EQUAL,),
321            Instruction::LogicalAnd => decompose!(instruction::LOGICAL_AND,),
322            Instruction::LogicalOr => decompose!(instruction::LOGICAL_OR,),
323            Instruction::Deref => decompose!(instruction::DEREF,),
324            Instruction::Set => decompose!(instruction::SET,),
325            Instruction::Matches => decompose!(instruction::MATCHES,),
326            Instruction::Bind => decompose!(instruction::BIND,),
327            Instruction::Yield => decompose!(instruction::YIELD,),
328        };
329        self.index += 1;
330        Some(byte)
331    }
332}
333
334impl IntoIterator for Instruction {
335    type Item = u8;
336    type IntoIter = InstructionIter;
337    fn into_iter(self) -> Self::IntoIter {
338        InstructionIter {
339            instruction: self,
340            index: 0,
341        }
342    }
343}
344
345#[derive(Clone, Copy, Debug)]
346struct Span {
347    start: usize,
348    // a token must have a length of at least one character,
349    // so we use this fact to provide Option<Span> a niche.
350    end: NonZero<usize>,
351}
352
353impl Span {
354    fn merge(&self, other: &Self) -> Self {
355        Self {
356            start: self.start,
357            end: other.end,
358        }
359    }
360}
361
362#[derive(Clone, Debug, Default)]
363struct ProgramBlock {
364    instructions: Vec<u8>,
365    debug_info: Vec<u8>,
366}
367
368struct ProgramPushBuilder<'block> {
369    span: Option<Span>,
370    block: &'block mut ProgramBlock,
371}
372
373impl ProgramBlock {
374    fn len(&self) -> usize {
375        self.instructions.len()
376    }
377
378    fn at(&mut self, span: Span) -> ProgramPushBuilder<'_> {
379        ProgramPushBuilder {
380            span: Some(span),
381            block: self,
382        }
383    }
384
385    fn push(&mut self, instruction: Instruction, span: Option<Span>) -> &mut Self {
386        let prev_len = self.instructions.len();
387        self.instructions.extend(instruction);
388        // The bytes of this instruction which need to be accounted for with ditto marks.
389        let remaining_bytes = self.instructions.len() - prev_len - span.is_some() as usize;
390        if let Some(span) = span {
391            let start = (span.start as u64).to_le_bytes();
392            let start =
393                &start[0..(size_of::<u64>() - start.into_iter().rev().filter(|x| *x == 0).count())];
394            let end = (usize::from(span.end) as u64).to_le_bytes();
395            let end =
396                &end[0..(size_of::<u64>() - end.into_iter().rev().filter(|x| *x == 0).count())];
397            self.debug_info.extend(
398                iter::once(debug::SPAN)
399                    .chain(iter::once(start.len() as u8))
400                    .chain(start.iter().copied())
401                    .chain(iter::once(end.len() as u8))
402                    .chain(end.iter().copied()),
403            );
404        }
405        if remaining_bytes > 0 {
406            self.debug_info
407                .extend([debug::DITTO, remaining_bytes as u8]);
408        }
409        self
410    }
411
412    // This should only be used for generated instructions where no position is available.
413    // Document why it is appropriate!
414    fn push_positionless(&mut self, instruction: Instruction) -> &mut Self {
415        self.push(instruction, None)
416    }
417}
418
419impl<'block> ProgramPushBuilder<'block> {
420    fn push(mut self, instruction: Instruction) -> Self {
421        self.block.push(instruction, self.span);
422        self.span = None;
423        self
424    }
425}
426
427#[derive(Clone, Debug, Default)]
428struct Program<'source> {
429    source: &'source str,
430    blocks: Vec<ProgramBlock>,
431    strings: Vec<Cow<'source, str>>,
432}
433
434impl<'source> Program<'source> {
435    fn prepare(ast: Box<Block<'source>>, source: &'source str) -> Result<Self, Error<'source>> {
436        let mut this = Self {
437            source,
438            ..Default::default()
439        };
440        let block_id = this.create_block()?;
441        this.add_block(block_id, ast, Scope::default())?;
442        Ok(this)
443    }
444
445    /// # Errors
446    ///
447    /// Returns an error if the program results in bytecode over 4GiB.
448    fn compile(self) -> Result<CompiledProgram, Error<'source>> {
449        let mut output = CompiledProgram {
450            executable: Vec::new(),
451            debug_info: Vec::new(),
452        };
453
454        output
455            .executable
456            .extend(u32::try_from(self.blocks.len())?.to_le_bytes());
457        output
458            .executable
459            .extend(u32::try_from(self.strings.len())?.to_le_bytes());
460        // Reserve space for vector offsets.
461        // Blocks, strings, and string sets are only referred to by index,
462        // so this is the only program-wide retroactive filling required.
463        let block_offsets = output.executable.len();
464        output
465            .executable
466            .extend(iter::repeat_n(0, self.blocks.len() * size_of::<u32>()));
467        // same for strings.
468        let string_offsets = output.executable.len();
469        output
470            .executable
471            .extend(iter::repeat_n(0, self.strings.len() * size_of::<u32>()));
472
473        // Fill in offsets.
474        for (block_id, block) in self.blocks.into_iter().enumerate() {
475            let src = BlockId::try_from(output.executable.len())?;
476            let dest = block_offsets + block_id * size_of_val(&src);
477            output.executable[dest..(dest + size_of_val(&src))].copy_from_slice(&src.to_le_bytes());
478            output.executable.extend(block.instructions);
479            output.debug_info.extend(block.debug_info);
480        }
481        for (string_id, string) in self.strings.into_iter().enumerate() {
482            let src = StringId::try_from(output.executable.len())?;
483            let dest = string_offsets + string_id * size_of_val(&src);
484            output.executable[dest..(dest + size_of_val(&src))].copy_from_slice(&src.to_le_bytes());
485            output.executable.extend(string.bytes());
486        }
487        Ok(output)
488    }
489
490    fn create_block(&mut self) -> Result<BlockId, Error<'source>> {
491        self.blocks.push(ProgramBlock::default());
492        (self.blocks.len() - 1)
493            .try_into()
494            .map_err(|_| Error::ProgramLimitExceeded)
495    }
496
497    fn create_string(
498        &mut self,
499        s: impl Into<Cow<'source, str>>,
500    ) -> Result<StringId, Error<'source>> {
501        let s = s.into();
502        if let Some((i, _)) = self.strings.iter().enumerate().find(|(_, x)| **x == s) {
503            i
504        } else {
505            self.strings.push(s);
506            self.strings.len() - 1
507        }
508        .try_into()
509        .map_err(|_| Error::ProgramLimitExceeded)
510    }
511
512    fn add_block(
513        &mut self,
514        block_id: BlockId,
515        block: Box<Block<'source>>,
516        mut scope: Scope<'_, 'source>,
517    ) -> Result<(), Error<'source>> {
518        self.insert_block(block_id, block, &mut scope)?;
519        Ok(())
520    }
521
522    fn insert_block(
523        &mut self,
524        block_id: BlockId,
525        block: Box<Block<'source>>,
526        scope: &mut Scope<'_, 'source>,
527    ) -> Result<(), Error<'source>> {
528        let (result, diagnostics, statements) = Block::destructure(block);
529        try_validate(diagnostics)?;
530        for statement in statements {
531            self.add_statement(block_id, statement, scope)?;
532        }
533        // Collapse the scope if any additional values are left the stack.
534        // This occurs when statements bind variables.
535        // We have to check this here because BlockResult::Function is about to move the scope,
536        // but the instruction is not emitted until after the result is pushed to the stack.
537        // This is fine because block results must push one and only one value.
538        let collapse_point = scope
539            .parent
540            .filter(|parent| parent.stack_pointer < scope.stack_pointer)
541            .map(|parent| parent.stack_pointer);
542        match result {
543            BlockResult::Expression(i) => {
544                self.add_expression(block_id, i, scope)?;
545            }
546            BlockResult::Function(function) => {
547                try_validate(function.diagnostics)?;
548                if let Some(input) = function.input {
549                    self.add_expression(block_id, input, scope)?;
550                } else {
551                    self.blocks[block_id as usize]
552                        .at(token_range(function.with_token, self.source))
553                        .push(Instruction::Clone(builtins::ANY));
554                    scope.stack_pointer += 1;
555                }
556                if let Some(output) = function.output {
557                    self.add_expression(block_id, output, scope)?;
558                } else {
559                    self.blocks[block_id as usize]
560                        .at(token_range(function.with_token, self.source))
561                        .push(Instruction::Clone(builtins::ANY));
562                    scope.stack_pointer += 1;
563                }
564                // Account for input and output being popped by PushFunction.
565                scope.stack_pointer -= 2;
566                let mut scope = scope.promote();
567                let captures = scope.stack_pointer;
568                let function_id = if let FunctionBody::Block(block) = function.body {
569                    let function_id = self.create_block()?;
570                    // to be filled in by the argument (which is about to be bound)
571                    scope.stack_pointer += 1;
572                    if let Some(argument) = function.argument {
573                        self.add_binding(function_id, argument, &mut scope)?;
574                    }
575                    self.add_block(function_id, block, scope)?;
576                    function_id
577                } else {
578                    if let Some(argument) = function.argument {
579                        self.validate_binding(argument)?;
580                    }
581                    0
582                };
583                self.blocks[block_id as usize]
584                    .at(token_range(function.with_token, self.source))
585                    .push(Instruction::PushFunction {
586                        captures,
587                        function: function_id,
588                    });
589            }
590        }
591        if let Some(collapse_point) = collapse_point {
592            // Collapse is entirely generated;
593            // `}` or `end` might fit as the associated token, but top-level blocks have no equivalent.
594            // (they may have no tokens at all)
595            self.blocks[block_id as usize].push_positionless(Instruction::Collapse(collapse_point));
596        }
597        Ok(())
598    }
599
600    /// This is a copy of add_binding with compilation disabled,
601    /// so that the argument to a never function is still validated even though its compilation is skipped.
602    /// I would prefer any other way to do this.
603    ///
604    /// self is entirely unused.
605    fn validate_binding(&self, binding: Binding<'source>) -> Result<(), Error<'source>> {
606        try_validate(binding.diagnostics)?;
607        match binding.method {
608            BindingMethod::Single(_) => {}
609            BindingMethod::Numeric { bindings, .. } => {
610                for binding in bindings {
611                    self.validate_binding(binding.binding)?;
612                }
613            }
614            BindingMethod::Named { bindings, .. } => {
615                for binding in bindings.into_iter().flat_map(|x| x.binding) {
616                    self.validate_binding(binding.binding)?;
617                }
618            }
619        }
620        Ok(())
621    }
622
623    fn add_binding(
624        &mut self,
625        block_id: BlockId,
626        binding: Binding<'source>,
627        scope: &mut Scope<'_, 'source>,
628    ) -> Result<(), Error<'source>> {
629        try_validate(binding.diagnostics)?;
630        let root = scope.stack_pointer - 1;
631        match binding.method {
632            BindingMethod::Single(token) => match token.lexigram {
633                Lexigram::Ident => scope.insert(
634                    token
635                        .resolve()
636                        .map_err(|e| Error::InvalidIdentifier(token, e))?,
637                ),
638                Lexigram::Discard => {}
639                _ => unreachable!("only idents and discards are valid bindings"),
640            },
641            BindingMethod::Numeric {
642                open_paren,
643                bindings,
644                close_paren,
645            } => {
646                for (i, binding) in bindings.into_iter().enumerate() {
647                    let range = token_range(open_paren, self.source).merge(&token_range(
648                        close_paren.expect("close paren should be present in a valid numeric bind"),
649                        self.source,
650                    ));
651                    self.blocks[block_id as usize]
652                        .at(range)
653                        .push(Instruction::Clone(root))
654                        .push(Instruction::PushI64(
655                            i64::try_from(i).expect("stack should never reach isize::MAX"),
656                        ))
657                        .push(Instruction::Index);
658                    scope.stack_pointer += 1;
659                    self.add_binding(block_id, binding.binding, scope)?;
660                }
661            }
662            BindingMethod::Named {
663                open_brace,
664                bindings,
665                close_brace,
666            } => {
667                for binding in bindings {
668                    let range = token_range(open_brace, self.source).merge(&token_range(
669                        close_brace.expect("close brace should be present in a valid numeric bind"),
670                        self.source,
671                    ));
672                    let s = self.create_string(
673                        binding
674                            .field
675                            .resolve()
676                            .map_err(|e| Error::InvalidIdentifier(binding.field, e))?,
677                    )?;
678                    self.blocks[block_id as usize]
679                        .at(range)
680                        .push(Instruction::Clone(root))
681                        .push(Instruction::PushString(s))
682                        .push(Instruction::Index);
683                    scope.stack_pointer += 1;
684                    if let Some(sub_binding) = binding.binding {
685                        self.add_binding(block_id, sub_binding.binding, scope)?;
686                    } else {
687                        scope.insert(
688                            binding
689                                .field
690                                .resolve()
691                                .map_err(|e| Error::InvalidIdentifier(binding.field, e))?,
692                        );
693                    }
694                }
695            }
696        }
697        Ok(())
698    }
699
700    /// # Panic
701    ///
702    /// Panics if provided an invalid statement.
703    /// Statements should be validated beforehand by checking the `diagnostics.errors` field.
704    fn add_statement(
705        &mut self,
706        block_id: BlockId,
707        statement: Statement<'source>,
708        scope: &mut Scope<'_, 'source>,
709    ) -> Result<(), Error<'source>> {
710        match statement {
711            Statement::Sequence(Sequence {
712                expression,
713                semicolon_token,
714            }) => {
715                self.add_expression(block_id, expression, scope)?;
716                self.blocks[block_id as usize]
717                    .at(token_range(semicolon_token, self.source))
718                    .push(Instruction::Pop);
719                scope.stack_pointer -= 1;
720            }
721            Statement::Let(Let {
722                binding,
723                expression,
724                diagnostics,
725                ..
726            }) => {
727                try_validate(diagnostics)?;
728                let binding = binding.expect("missing bindings should be caught by diagnostics");
729                self.add_expression(block_id, expression, scope)?;
730                self.add_binding(block_id, binding, scope)?;
731            }
732            Statement::Rebind(Rebind {
733                by, diagnostics, ..
734            }) => {
735                try_validate(diagnostics)?;
736                match by {
737                    RebindBy::Glob { star_token } => {
738                        for (ident, value) in scope
739                            .parent
740                            .iter()
741                            .flat_map(|parent| parent.bindings.iter())
742                        {
743                            self.blocks[block_id as usize]
744                                .at(token_range(star_token, self.source))
745                                .push(Instruction::Clone(*value));
746                            scope.stack_pointer += 1;
747                            // This is an inlined scope.insert call to allow
748                            // mutating these fields while borrowing `parent`.
749                            scope.bindings.push((
750                                ident.clone(),
751                                scope
752                                    .stack_pointer
753                                    .checked_sub(1)
754                                    .expect("attempted to assign variable while stack was empty"),
755                            ));
756                        }
757                    }
758                    RebindBy::Identifiers { bindings } => {
759                        for binding in bindings {
760                            let token = binding
761                                .ident_token
762                                .expect("bindings should be some in a valid rebind");
763                            let ident = token
764                                .resolve()
765                                .map_err(|e| Error::InvalidIdentifier(token, e))?;
766                            let value = scope.get(&ident).ok_or(Error::UndefinedSymbol(token))?;
767                            self.blocks[block_id as usize]
768                                .at(token_range(token, self.source))
769                                .push(Instruction::Clone(value));
770                            scope.stack_pointer += 1;
771                            scope.insert(ident);
772                        }
773                    }
774                }
775            }
776            Statement::Set(Set {
777                target,
778                expression,
779                diagnostics,
780                set_token,
781                ..
782            }) => {
783                try_validate(diagnostics)?;
784                self.add_expression(block_id, target, scope)?;
785                self.add_expression(block_id, expression, scope)?;
786                self.blocks[block_id as usize]
787                    .at(token_range(set_token, self.source))
788                    .push(Instruction::Set);
789                scope.stack_pointer -= 2;
790            }
791            Statement::Use(Use {
792                expression,
793                diagnostics,
794                ..
795            }) => {
796                try_validate(diagnostics)?;
797                self.add_expression(block_id, expression, scope)?;
798                scope.bind_function = Some(
799                    scope
800                        .stack_pointer
801                        .checked_sub(1)
802                        .expect("attempted to assign bind function while stack was empty"),
803                );
804            }
805        }
806        Ok(())
807    }
808
809    fn add_expression(
810        &mut self,
811        block_id: BlockId,
812        expression: impl Into<Option<Box<Expression<'source>>>>,
813        scope: &mut Scope<'_, 'source>,
814    ) -> Result<(), Error<'source>> {
815        // Shortcut for re-indexing self.blocks.
816        // This is necessary because add_block etc take &mut self.
817        macro_rules! block {
818            () => {
819                self.blocks[block_id as usize]
820            };
821        }
822        macro_rules! binop {
823            ($instruction:expr, $token:expr) => {{
824                scope.stack_pointer -= 1;
825                block!()
826                    .at(token_range($token, self.source))
827                    .push($instruction);
828            }};
829        }
830        let Some(expression) = expression.into() else {
831            scope.stack_pointer += 1;
832            // This expression has no tokens
833            // so this instruction can't have a position.
834            block!().push_positionless(Instruction::PushUnit);
835            return Ok(());
836        };
837        let (_first_token, _last_token, diagnostics, nodes) = Expression::destructure(expression);
838        try_validate(diagnostics)?;
839        for node in nodes {
840            match node {
841                Node::Unit(open_paren, close_paren) => {
842                    scope.stack_pointer += 1;
843                    block!()
844                        .at(token_range(open_paren, self.source)
845                            .merge(&token_range(close_paren, self.source)))
846                        .push(Instruction::PushUnit);
847                }
848                Node::Number(token) => {
849                    let integer = token
850                        .origin
851                        .parse()
852                        .map_err(|e| Error::InvalidInteger(token, e))?;
853                    scope.stack_pointer += 1;
854                    block!()
855                        .at(token_range(token, self.source))
856                        .push(Instruction::PushI64(integer));
857                }
858                Node::String(string_token) => {
859                    // trim starting and ending quotes.
860                    // adjust this if additional string formats are added.
861                    let string = self.create_string(
862                        string_token
863                            .resolve()
864                            .map_err(|e| Error::InvalidString(string_token, e))?,
865                    )?;
866                    scope.stack_pointer += 1;
867                    block!()
868                        .at(token_range(string_token, self.source))
869                        .push(Instruction::PushString(string));
870                }
871                Node::Variable(token) => {
872                    let value = scope
873                        .get(
874                            &token
875                                .resolve()
876                                .map_err(|e| Error::InvalidIdentifier(token, e))?,
877                        )
878                        .ok_or(Error::UndefinedSymbol(token))?;
879                    scope.stack_pointer += 1;
880                    block!()
881                        .at(token_range(token, self.source))
882                        .push(Instruction::Clone(value));
883                }
884                Node::Add(t) => binop!(Instruction::Add, t),
885                Node::Sub(t) => binop!(Instruction::Sub, t),
886                Node::Mul(t) => binop!(Instruction::Mul, t),
887                Node::Div(t) => binop!(Instruction::Div, t),
888                Node::Tuple(t) => binop!(Instruction::Tuple, t),
889                Node::Call(t) => binop!(Instruction::Call, t),
890                Node::Pipe(t) => binop!(Instruction::Pipe, t),
891                Node::BitwiseAnd(t) => binop!(Instruction::BitwiseAnd, t),
892                Node::BitwiseOr(t) => binop!(Instruction::BitwiseOr, t),
893                Node::BitwiseXor(t) => binop!(Instruction::BitwiseXor, t),
894                Node::EqualTo(t) => binop!(Instruction::EqualTo, t),
895                Node::NotEqualTo(t) => binop!(Instruction::NotEqualTo, t),
896                Node::Greater(t) => binop!(Instruction::Greater, t),
897                Node::GreaterEqual(t) => binop!(Instruction::GreaterEqual, t),
898                Node::Lesser(t) => binop!(Instruction::Lesser, t),
899                Node::LesserEqual(t) => binop!(Instruction::LesserEqual, t),
900                Node::LogicalAnd(t) => binop!(Instruction::LogicalAnd, t),
901                Node::LogicalOr(t) => binop!(Instruction::LogicalOr, t),
902                // Unaries
903                // These do not move the stack,
904                // and say "+= 0" to make this clear.
905                Node::Positive(_) => {
906                    scope.stack_pointer += 0;
907                    // + is a no-op
908                }
909                Node::Negative(t) => {
910                    scope.stack_pointer += 0;
911                    block!()
912                        .at(token_range(t, self.source))
913                        .push(Instruction::Negative);
914                }
915                Node::Annotation(_) => {
916                    scope.stack_pointer += 0;
917                    // annotations are a no-op
918                }
919                Node::Deref(t) => {
920                    scope.stack_pointer += 0;
921                    block!()
922                        .at(token_range(t, self.source))
923                        .push(Instruction::Deref);
924                }
925                Node::Name { name, colon_token } => {
926                    scope.stack_pointer += 0;
927                    let range = token_range(colon_token, self.source);
928                    if name.lexigram == Lexigram::Discard {
929                        block!().at(range).push(Instruction::Nest);
930                    } else {
931                        let s = self.create_string(
932                            name.resolve()
933                                .map_err(|e| Error::InvalidIdentifier(name, e))?,
934                        )?;
935                        block!().at(range).push(Instruction::Name(s));
936                    }
937                }
938
939                Node::Block(block) => {
940                    self.add_block(block_id, block, scope.child())?;
941                    scope.stack_pointer += 1;
942                }
943                Node::Bool(boolean, t) => {
944                    scope.stack_pointer += 1;
945                    block!().at(token_range(t, self.source)).push(if boolean {
946                        Instruction::PushTrue
947                    } else {
948                        Instruction::PushFalse
949                    });
950                }
951                Node::Bind(t) => {
952                    let range = token_range(t, self.source);
953                    if let Some(bind) = scope.get_bind() {
954                        block!()
955                            .at(range)
956                            .push(Instruction::Clone(bind))
957                            .push(Instruction::Bind);
958                        scope.stack_pointer += 0;
959                    } else {
960                        block!().at(range).push(Instruction::Yield);
961                    }
962                }
963                Node::If(if_block) => {
964                    try_validate(if_block.diagnostics)?;
965                    let range = token_range(if_block.if_token, self.source);
966                    self.add_expression(block_id, if_block.condition, scope)?;
967                    block!().at(range).push(Instruction::If(0));
968                    scope.stack_pointer -= 1;
969                    let if_destination = block!().len() - size_of::<ProgramCounter>();
970                    self.add_block(block_id, if_block.first, scope.child())?;
971
972                    // the first block always returns a value (though it may be implicit unit)
973                    // so there always needs to be an else block with some value as well,
974                    // but an empty else block will merely return unit.
975
976                    // This jump is the last instruction of the first block--
977                    // it skips over else.
978                    block!().at(range).push(Instruction::Jump(0));
979                    let jump_destination = block!().len() - size_of::<ProgramCounter>();
980
981                    // Now that the first block is complete,
982                    // we can fill in the conditional jump's destination.
983                    resolve_jump(&mut block!(), if_destination);
984                    self.add_block(block_id, if_block.second, scope.child())?;
985
986                    resolve_jump(&mut block!(), jump_destination);
987                    scope.stack_pointer += 1;
988                }
989                Node::Field {
990                    dot_token: _,
991                    index,
992                } => {
993                    let instruction = match index {
994                        Token {
995                            lexigram: Lexigram::Ident,
996                            ..
997                        } => {
998                            let s = self.create_string(
999                                index
1000                                    .resolve()
1001                                    .map_err(|e| Error::InvalidIdentifier(index, e))?,
1002                            )?;
1003                            Instruction::PushString(s)
1004                        }
1005                        Token {
1006                            lexigram: Lexigram::Number,
1007                            origin,
1008                        } => {
1009                            let integer = origin
1010                                .parse()
1011                                .map_err(|e| Error::InvalidInteger(index, e))?;
1012                            Instruction::PushI64(integer)
1013                        }
1014                        _ => {
1015                            panic!("expected an identifier or number in field index, got {index:?}")
1016                        }
1017                    };
1018                    block!()
1019                        .at(token_range(index, self.source))
1020                        .push(instruction)
1021                        .push(Instruction::Index);
1022                    // the stack pointer does not move by the end,
1023                    // because while Index pops twice, we performed one of the pushes ourselves.
1024                    scope.stack_pointer += 0;
1025                }
1026                Node::Enum(enumeration) => {
1027                    try_validate(enumeration.diagnostics)?;
1028                    self.add_expression(block_id, enumeration.variants, scope)?;
1029                    // at this point, the stack has grown by 2 + statics.
1030                    // however only the variants (bottom of the stack) are accounted for in our scope,
1031                    // and the resulting enum will replace it.
1032                    scope.stack_pointer += 0;
1033                    self.blocks[block_id as usize]
1034                        .at(token_range(enumeration.enum_token, self.source))
1035                        .push(Instruction::PushEnum);
1036                }
1037                Node::Match(match_expression) => {
1038                    let (_match_token, expression, _then_token, _end_token, diagnostics, cases) =
1039                        Match::destructure(match_expression);
1040                    try_validate(diagnostics)?;
1041                    let subject = scope.stack_pointer;
1042                    self.add_expression(block_id, expression, scope)?;
1043                    let jump_locations = cases
1044                        .map(|case| {
1045                            let scope = &mut scope.child();
1046                            // There isn't really a good place to anchor the generated instructions to,
1047                            // but i think it's best to use something.
1048                            let range = token_range(
1049                                case.arrow_token
1050                                    .expect("valid enum cases should have an arrow token"),
1051                                self.source,
1052                            );
1053                            self.blocks[block_id as usize]
1054                                .at(range)
1055                                .push(Instruction::Clone(subject));
1056                            scope.stack_pointer += 1;
1057                            self.add_expression(block_id, case.case, scope)?;
1058                            self.blocks[block_id as usize]
1059                                .at(range)
1060                                .push(Instruction::Matches)
1061                                .push(Instruction::If(0));
1062                            scope.stack_pointer -= 2;
1063                            let if_location =
1064                                self.blocks[block_id as usize].len() - size_of::<ProgramCounter>();
1065
1066                            assert_eq!(scope.stack_pointer - 1, subject);
1067                            if let Some(binding) = case.binding {
1068                                self.add_binding(block_id, binding, scope)?;
1069                            }
1070                            self.add_expression(block_id, case.expression, scope)?;
1071                            self.blocks[block_id as usize]
1072                                .at(range)
1073                                .push(Instruction::Collapse(subject))
1074                                .push(Instruction::Jump(0));
1075                            resolve_jump(&mut self.blocks[block_id as usize], if_location);
1076                            Ok(self.blocks[block_id as usize].len() - size_of::<ProgramCounter>())
1077                        })
1078                        .collect::<Result<Box<[usize]>, Error>>()?;
1079                    for jump_location in jump_locations {
1080                        resolve_jump(&mut self.blocks[block_id as usize], jump_location);
1081                    }
1082                }
1083            }
1084        }
1085        Ok(())
1086    }
1087}
1088
1089#[derive(Default)]
1090struct Scope<'parent, 'source> {
1091    parent: Option<&'parent Scope<'parent, 'source>>,
1092
1093    stack_pointer: StackPointer,
1094    bindings: Vec<(Cow<'source, str>, StackPointer)>,
1095    bind_function: Option<StackPointer>,
1096}
1097
1098impl<'parent, 'source> Scope<'parent, 'source> {
1099    fn get(&self, k: &'source str) -> Option<StackPointer> {
1100        self.bindings
1101            .iter()
1102            // Use most recent binding
1103            .rev()
1104            .find(|(x, _)| *x == k)
1105            .map(|(_, x)| x)
1106            .copied()
1107            .or_else(|| {
1108                self.parent
1109                    .and_then(|parent| parent.get(k))
1110                    .or_else(|| builtins::from_str(k))
1111            })
1112    }
1113
1114    fn insert(&mut self, k: impl Into<Cow<'source, str>>) {
1115        self.bindings.push((
1116            k.into(),
1117            self.stack_pointer
1118                .checked_sub(1)
1119                .expect("attempted to assign variable while stack was empty"),
1120        ));
1121    }
1122
1123    fn get_bind(&self) -> Option<StackPointer> {
1124        self.bind_function
1125            .or_else(|| self.parent.and_then(|parent| parent.get_bind()))
1126    }
1127
1128    fn child(&'parent self) -> Self {
1129        Self {
1130            parent: Some(self),
1131            stack_pointer: self.stack_pointer,
1132            ..Default::default()
1133        }
1134    }
1135
1136    /// Removes all variable bindings from this scope
1137    /// and creates a new scope which begins with them.
1138    ///
1139    /// The new scope will not inherit the original scope's bind function.
1140    fn promote(&mut self) -> Self {
1141        let lost_size = self.parent.map_or(0, |x| x.stack_pointer);
1142        let mut new_bindings = Vec::new();
1143        mem::swap(&mut self.bindings, &mut new_bindings);
1144        for binding in &mut new_bindings {
1145            binding.1 -= lost_size;
1146        }
1147        Self {
1148            parent: None,
1149            stack_pointer: self.stack_pointer - lost_size,
1150            bindings: new_bindings,
1151            bind_function: None,
1152        }
1153    }
1154}