nu_engine/compile/
builder.rs

1use nu_protocol::{
2    CompileError, IntoSpanned, RegId, Span, Spanned,
3    ast::Pattern,
4    ir::{DataSlice, Instruction, IrAstRef, IrBlock, Literal},
5};
6
7/// A label identifier. Only exists while building code. Replaced with the actual target.
8#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub(crate) struct LabelId(pub usize);
10
11/// Builds [`IrBlock`]s progressively by consuming instructions and handles register allocation.
12#[derive(Debug)]
13pub(crate) struct BlockBuilder {
14    pub(crate) block_span: Option<Span>,
15    pub(crate) instructions: Vec<Instruction>,
16    pub(crate) spans: Vec<Span>,
17    /// The actual instruction index that a label refers to. While building IR, branch targets are
18    /// specified as indices into this array rather than the true instruction index. This makes it
19    /// easier to make modifications to code, as just this array needs to be changed, and it's also
20    /// less error prone as during `finish()` we check to make sure all of the used labels have had
21    /// an index actually set.
22    pub(crate) labels: Vec<Option<usize>>,
23    pub(crate) data: Vec<u8>,
24    pub(crate) ast: Vec<Option<IrAstRef>>,
25    pub(crate) comments: Vec<String>,
26    pub(crate) register_allocation_state: Vec<bool>,
27    pub(crate) file_count: u32,
28    pub(crate) context_stack: ContextStack,
29}
30
31impl BlockBuilder {
32    /// Starts a new block, with the first register (`%0`) allocated as input.
33    pub(crate) fn new(block_span: Option<Span>) -> Self {
34        BlockBuilder {
35            block_span,
36            instructions: vec![],
37            spans: vec![],
38            labels: vec![],
39            data: vec![],
40            ast: vec![],
41            comments: vec![],
42            register_allocation_state: vec![true],
43            file_count: 0,
44            context_stack: ContextStack::new(),
45        }
46    }
47
48    /// Get the next unused register for code generation.
49    pub(crate) fn next_register(&mut self) -> Result<RegId, CompileError> {
50        if let Some(index) = self
51            .register_allocation_state
52            .iter_mut()
53            .position(|is_allocated| {
54                if !*is_allocated {
55                    *is_allocated = true;
56                    true
57                } else {
58                    false
59                }
60            })
61        {
62            Ok(RegId::new(index as u32))
63        } else if self.register_allocation_state.len() < (u32::MAX as usize - 2) {
64            let reg_id = RegId::new(self.register_allocation_state.len() as u32);
65            self.register_allocation_state.push(true);
66            Ok(reg_id)
67        } else {
68            Err(CompileError::RegisterOverflow {
69                block_span: self.block_span,
70            })
71        }
72    }
73
74    /// Check if a register is initialized with a value.
75    pub(crate) fn is_allocated(&self, reg_id: RegId) -> bool {
76        self.register_allocation_state
77            .get(reg_id.get() as usize)
78            .is_some_and(|state| *state)
79    }
80
81    /// Mark a register as initialized.
82    pub(crate) fn mark_register(&mut self, reg_id: RegId) -> Result<(), CompileError> {
83        if let Some(is_allocated) = self
84            .register_allocation_state
85            .get_mut(reg_id.get() as usize)
86        {
87            *is_allocated = true;
88            Ok(())
89        } else {
90            Err(CompileError::RegisterOverflow {
91                block_span: self.block_span,
92            })
93        }
94    }
95
96    /// Mark a register as empty, so that it can be used again by something else.
97    #[track_caller]
98    pub(crate) fn free_register(&mut self, reg_id: RegId) -> Result<(), CompileError> {
99        let index = reg_id.get() as usize;
100
101        if self
102            .register_allocation_state
103            .get(index)
104            .is_some_and(|is_allocated| *is_allocated)
105        {
106            self.register_allocation_state[index] = false;
107            Ok(())
108        } else {
109            log::warn!("register {reg_id} uninitialized, builder = {self:#?}");
110            Err(CompileError::RegisterUninitialized {
111                reg_id,
112                caller: std::panic::Location::caller().to_string(),
113            })
114        }
115    }
116
117    /// Define a label, which can be used by branch instructions. The target can optionally be
118    /// specified now.
119    pub(crate) fn label(&mut self, target_index: Option<usize>) -> LabelId {
120        let label_id = self.labels.len();
121        self.labels.push(target_index);
122        LabelId(label_id)
123    }
124
125    /// Change the target of a label.
126    pub(crate) fn set_label(
127        &mut self,
128        label_id: LabelId,
129        target_index: usize,
130    ) -> Result<(), CompileError> {
131        *self
132            .labels
133            .get_mut(label_id.0)
134            .ok_or(CompileError::UndefinedLabel {
135                label_id: label_id.0,
136                span: None,
137            })? = Some(target_index);
138        Ok(())
139    }
140
141    /// Insert an instruction into the block, automatically marking any registers populated by
142    /// the instruction, and freeing any registers consumed by the instruction.
143    #[track_caller]
144    pub(crate) fn push(&mut self, instruction: Spanned<Instruction>) -> Result<(), CompileError> {
145        // Free read registers, and mark write registers.
146        //
147        // If a register is both read and written, it should be on both sides, so that we can verify
148        // that the register was in the right state beforehand.
149        let mut allocate = |read: &[RegId], write: &[RegId]| -> Result<(), CompileError> {
150            for reg in read {
151                self.free_register(*reg)?;
152            }
153            for reg in write {
154                self.mark_register(*reg)?;
155            }
156            Ok(())
157        };
158
159        let allocate_result = match &instruction.item {
160            Instruction::Unreachable => Ok(()),
161            Instruction::LoadLiteral { dst, lit } => {
162                allocate(&[], &[*dst]).and(
163                    // Free any registers on the literal
164                    match lit {
165                        Literal::Range {
166                            start,
167                            step,
168                            end,
169                            inclusion: _,
170                        } => allocate(&[*start, *step, *end], &[]),
171                        Literal::Bool(_)
172                        | Literal::Int(_)
173                        | Literal::Float(_)
174                        | Literal::Filesize(_)
175                        | Literal::Duration(_)
176                        | Literal::Binary(_)
177                        | Literal::Block(_)
178                        | Literal::Closure(_)
179                        | Literal::RowCondition(_)
180                        | Literal::List { capacity: _ }
181                        | Literal::Record { capacity: _ }
182                        | Literal::Filepath {
183                            val: _,
184                            no_expand: _,
185                        }
186                        | Literal::Directory {
187                            val: _,
188                            no_expand: _,
189                        }
190                        | Literal::GlobPattern {
191                            val: _,
192                            no_expand: _,
193                        }
194                        | Literal::String(_)
195                        | Literal::RawString(_)
196                        | Literal::CellPath(_)
197                        | Literal::Date(_)
198                        | Literal::Nothing => Ok(()),
199                    },
200                )
201            }
202            Instruction::LoadValue { dst, val: _ } => allocate(&[], &[*dst]),
203            Instruction::Move { dst, src } => allocate(&[*src], &[*dst]),
204            Instruction::Clone { dst, src } => allocate(&[*src], &[*dst, *src]),
205            Instruction::Collect { src_dst } => allocate(&[*src_dst], &[*src_dst]),
206            Instruction::Span { src_dst } => allocate(&[*src_dst], &[*src_dst]),
207            Instruction::Drop { src } => allocate(&[*src], &[]),
208            Instruction::Drain { src } => allocate(&[*src], &[]),
209            Instruction::DrainIfEnd { src } => allocate(&[*src], &[]),
210            Instruction::LoadVariable { dst, var_id: _ } => allocate(&[], &[*dst]),
211            Instruction::StoreVariable { var_id: _, src } => allocate(&[*src], &[]),
212            Instruction::DropVariable { var_id: _ } => Ok(()),
213            Instruction::LoadEnv { dst, key: _ } => allocate(&[], &[*dst]),
214            Instruction::LoadEnvOpt { dst, key: _ } => allocate(&[], &[*dst]),
215            Instruction::StoreEnv { key: _, src } => allocate(&[*src], &[]),
216            Instruction::PushPositional { src } => allocate(&[*src], &[]),
217            Instruction::AppendRest { src } => allocate(&[*src], &[]),
218            Instruction::PushFlag { name: _ } => Ok(()),
219            Instruction::PushShortFlag { short: _ } => Ok(()),
220            Instruction::PushNamed { name: _, src } => allocate(&[*src], &[]),
221            Instruction::PushShortNamed { short: _, src } => allocate(&[*src], &[]),
222            Instruction::PushParserInfo { name: _, info: _ } => Ok(()),
223            Instruction::RedirectOut { mode: _ } => Ok(()),
224            Instruction::RedirectErr { mode: _ } => Ok(()),
225            Instruction::CheckErrRedirected { src } => allocate(&[*src], &[*src]),
226            Instruction::OpenFile {
227                file_num: _,
228                path,
229                append: _,
230            } => allocate(&[*path], &[]),
231            Instruction::WriteFile { file_num: _, src } => allocate(&[*src], &[]),
232            Instruction::CloseFile { file_num: _ } => Ok(()),
233            Instruction::Call {
234                decl_id: _,
235                src_dst,
236            } => allocate(&[*src_dst], &[*src_dst]),
237            Instruction::StringAppend { src_dst, val } => allocate(&[*src_dst, *val], &[*src_dst]),
238            Instruction::GlobFrom {
239                src_dst,
240                no_expand: _,
241            } => allocate(&[*src_dst], &[*src_dst]),
242            Instruction::ListPush { src_dst, item } => allocate(&[*src_dst, *item], &[*src_dst]),
243            Instruction::ListSpread { src_dst, items } => {
244                allocate(&[*src_dst, *items], &[*src_dst])
245            }
246            Instruction::RecordInsert { src_dst, key, val } => {
247                allocate(&[*src_dst, *key, *val], &[*src_dst])
248            }
249            Instruction::RecordSpread { src_dst, items } => {
250                allocate(&[*src_dst, *items], &[*src_dst])
251            }
252            Instruction::Not { src_dst } => allocate(&[*src_dst], &[*src_dst]),
253            Instruction::BinaryOp {
254                lhs_dst,
255                op: _,
256                rhs,
257            } => allocate(&[*lhs_dst, *rhs], &[*lhs_dst]),
258            Instruction::FollowCellPath { src_dst, path } => {
259                allocate(&[*src_dst, *path], &[*src_dst])
260            }
261            Instruction::CloneCellPath { dst, src, path } => {
262                allocate(&[*src, *path], &[*src, *dst])
263            }
264            Instruction::UpsertCellPath {
265                src_dst,
266                path,
267                new_value,
268            } => allocate(&[*src_dst, *path, *new_value], &[*src_dst]),
269            Instruction::Jump { index: _ } => Ok(()),
270            Instruction::BranchIf { cond, index: _ } => allocate(&[*cond], &[]),
271            Instruction::BranchIfEmpty { src, index: _ } => allocate(&[*src], &[*src]),
272            Instruction::Match {
273                pattern: _,
274                src,
275                index: _,
276            } => allocate(&[*src], &[*src]),
277            Instruction::CheckMatchGuard { src } => allocate(&[*src], &[*src]),
278            Instruction::Iterate {
279                dst,
280                stream,
281                end_index: _,
282            } => allocate(&[*stream], &[*dst, *stream]),
283            Instruction::OnError { index: _ } => Ok(()),
284            Instruction::OnErrorInto { index: _, dst } => allocate(&[], &[*dst]),
285            Instruction::PopErrorHandler => Ok(()),
286            Instruction::ReturnEarly { src } => allocate(&[*src], &[]),
287            Instruction::Return { src } => allocate(&[*src], &[]),
288        };
289
290        // Add more context to the error
291        match allocate_result {
292            Ok(()) => (),
293            Err(CompileError::RegisterUninitialized { reg_id, caller }) => {
294                return Err(CompileError::RegisterUninitializedWhilePushingInstruction {
295                    reg_id,
296                    caller,
297                    instruction: format!("{:?}", instruction.item),
298                    span: instruction.span,
299                });
300            }
301            Err(err) => return Err(err),
302        }
303
304        self.instructions.push(instruction.item);
305        self.spans.push(instruction.span);
306        self.ast.push(None);
307        self.comments.push(String::new());
308        Ok(())
309    }
310
311    /// Set the AST of the last instruction. Separate method because it's rarely used.
312    pub(crate) fn set_last_ast(&mut self, ast_ref: Option<IrAstRef>) {
313        *self.ast.last_mut().expect("no last instruction") = ast_ref;
314    }
315
316    /// Add a comment to the last instruction.
317    pub(crate) fn add_comment(&mut self, comment: impl std::fmt::Display) {
318        add_comment(
319            self.comments.last_mut().expect("no last instruction"),
320            comment,
321        )
322    }
323
324    /// Load a register with a literal.
325    pub(crate) fn load_literal(
326        &mut self,
327        reg_id: RegId,
328        literal: Spanned<Literal>,
329    ) -> Result<(), CompileError> {
330        self.push(
331            Instruction::LoadLiteral {
332                dst: reg_id,
333                lit: literal.item,
334            }
335            .into_spanned(literal.span),
336        )?;
337        Ok(())
338    }
339
340    /// Allocate a new register and load a literal into it.
341    pub(crate) fn literal(&mut self, literal: Spanned<Literal>) -> Result<RegId, CompileError> {
342        let reg_id = self.next_register()?;
343        self.load_literal(reg_id, literal)?;
344        Ok(reg_id)
345    }
346
347    /// Deallocate a register and set it to `Empty`, if it is allocated
348    pub(crate) fn drop_reg(&mut self, reg_id: RegId) -> Result<(), CompileError> {
349        if self.is_allocated(reg_id) {
350            // try using the block Span if available, since that's slightly more helpful than Span::unknown
351            let span = self.block_span.unwrap_or(Span::unknown());
352            self.push(Instruction::Drop { src: reg_id }.into_spanned(span))?;
353        }
354        Ok(())
355    }
356
357    /// Set a register to `Empty`, but mark it as in-use, e.g. for input
358    pub(crate) fn load_empty(&mut self, reg_id: RegId) -> Result<(), CompileError> {
359        self.drop_reg(reg_id)?;
360        self.mark_register(reg_id)
361    }
362
363    /// Drain the stream in a register (fully consuming it)
364    pub(crate) fn drain(&mut self, src: RegId, span: Span) -> Result<(), CompileError> {
365        self.push(Instruction::Drain { src }.into_spanned(span))
366    }
367
368    /// Add data to the `data` array and return a [`DataSlice`] referencing it.
369    pub(crate) fn data(&mut self, data: impl AsRef<[u8]>) -> Result<DataSlice, CompileError> {
370        let data = data.as_ref();
371        let start = self.data.len();
372        if data.is_empty() {
373            Ok(DataSlice::empty())
374        } else if start + data.len() < u32::MAX as usize {
375            let slice = DataSlice {
376                start: start as u32,
377                len: data.len() as u32,
378            };
379            self.data.extend_from_slice(data);
380            Ok(slice)
381        } else {
382            Err(CompileError::DataOverflow {
383                block_span: self.block_span,
384            })
385        }
386    }
387
388    /// Clone a register with a `clone` instruction.
389    pub(crate) fn clone_reg(&mut self, src: RegId, span: Span) -> Result<RegId, CompileError> {
390        let dst = self.next_register()?;
391        self.push(Instruction::Clone { dst, src }.into_spanned(span))?;
392        Ok(dst)
393    }
394
395    /// Add a `branch-if` instruction
396    pub(crate) fn branch_if(
397        &mut self,
398        cond: RegId,
399        label_id: LabelId,
400        span: Span,
401    ) -> Result<(), CompileError> {
402        self.push(
403            Instruction::BranchIf {
404                cond,
405                index: label_id.0,
406            }
407            .into_spanned(span),
408        )
409    }
410
411    /// Add a `branch-if-empty` instruction
412    pub(crate) fn branch_if_empty(
413        &mut self,
414        src: RegId,
415        label_id: LabelId,
416        span: Span,
417    ) -> Result<(), CompileError> {
418        self.push(
419            Instruction::BranchIfEmpty {
420                src,
421                index: label_id.0,
422            }
423            .into_spanned(span),
424        )
425    }
426
427    /// Add a `jump` instruction
428    pub(crate) fn jump(&mut self, label_id: LabelId, span: Span) -> Result<(), CompileError> {
429        self.push(Instruction::Jump { index: label_id.0 }.into_spanned(span))
430    }
431
432    /// Add a `match` instruction
433    pub(crate) fn r#match(
434        &mut self,
435        pattern: Pattern,
436        src: RegId,
437        label_id: LabelId,
438        span: Span,
439    ) -> Result<(), CompileError> {
440        self.push(
441            Instruction::Match {
442                pattern: Box::new(pattern),
443                src,
444                index: label_id.0,
445            }
446            .into_spanned(span),
447        )
448    }
449
450    /// The index that the next instruction [`.push()`](Self::push)ed will have.
451    pub(crate) fn here(&self) -> usize {
452        self.instructions.len()
453    }
454
455    /// Allocate a new file number, for redirection.
456    pub(crate) fn next_file_num(&mut self) -> Result<u32, CompileError> {
457        let next = self.file_count;
458        self.file_count = self
459            .file_count
460            .checked_add(1)
461            .ok_or(CompileError::FileOverflow {
462                block_span: self.block_span,
463            })?;
464        Ok(next)
465    }
466
467    /// Push a new loop state onto the builder. Creates new labels that must be set.
468    pub(crate) fn begin_loop(&mut self) -> Loop {
469        let loop_ = Loop {
470            break_label: self.label(None),
471            continue_label: self.label(None),
472        };
473        self.context_stack.push_loop(loop_);
474        loop_
475    }
476
477    /// True if we are currently in a loop.
478    pub(crate) fn is_in_loop(&self) -> bool {
479        self.context_stack.is_in_loop()
480    }
481
482    /// Add a loop breaking jump instruction.
483    pub(crate) fn push_break(&mut self, span: Span) -> Result<(), CompileError> {
484        let loop_ = self
485            .context_stack
486            .current_loop()
487            .ok_or_else(|| CompileError::NotInALoop {
488                msg: "`break` called from outside of a loop".into(),
489                span: Some(span),
490            })?;
491        self.jump(loop_.break_label, span)
492    }
493
494    /// Add a loop continuing jump instruction.
495    pub(crate) fn push_continue(&mut self, span: Span) -> Result<(), CompileError> {
496        let loop_ = self
497            .context_stack
498            .current_loop()
499            .ok_or_else(|| CompileError::NotInALoop {
500                msg: "`continue` called from outside of a loop".into(),
501                span: Some(span),
502            })?;
503        self.jump(loop_.continue_label, span)
504    }
505
506    /// Pop the loop state. Checks that the loop being ended is the same one that was expected.
507    pub(crate) fn end_loop(&mut self, loop_: Loop) -> Result<(), CompileError> {
508        let context_block = self
509            .context_stack
510            .pop()
511            .ok_or_else(|| CompileError::NotInALoop {
512                msg: "end_loop() called outside of a loop".into(),
513                span: None,
514            })?;
515
516        match context_block {
517            ContextBlock::Loop(ended_loop) if ended_loop == loop_ => Ok(()),
518            _ => Err(CompileError::IncoherentLoopState {
519                block_span: self.block_span,
520            }),
521        }
522    }
523
524    /// Mark an unreachable code path. Produces an error at runtime if executed.
525    #[allow(dead_code)] // currently unused, but might be used in the future.
526    pub(crate) fn unreachable(&mut self, span: Span) -> Result<(), CompileError> {
527        self.push(Instruction::Unreachable.into_spanned(span))
528    }
529
530    /// Consume the builder and produce the final [`IrBlock`].
531    pub(crate) fn finish(mut self) -> Result<IrBlock, CompileError> {
532        // Add comments to label targets
533        for (index, label_target) in self.labels.iter().enumerate() {
534            if let Some(label_target) = label_target {
535                add_comment(
536                    &mut self.comments[*label_target],
537                    format_args!("label({index})"),
538                );
539            }
540        }
541
542        // Populate the actual target indices of labels into the instructions
543        for ((index, instruction), span) in
544            self.instructions.iter_mut().enumerate().zip(&self.spans)
545        {
546            if let Some(label_id) = instruction.branch_target() {
547                let target_index = self.labels.get(label_id).cloned().flatten().ok_or(
548                    CompileError::UndefinedLabel {
549                        label_id,
550                        span: Some(*span),
551                    },
552                )?;
553                // Add a comment to the target index that we come from here
554                add_comment(
555                    &mut self.comments[target_index],
556                    format_args!("from({index}:)"),
557                );
558                instruction.set_branch_target(target_index).map_err(|_| {
559                    CompileError::SetBranchTargetOfNonBranchInstruction {
560                        instruction: format!("{instruction:?}"),
561                        span: *span,
562                    }
563                })?;
564            }
565        }
566
567        Ok(IrBlock {
568            instructions: self.instructions,
569            spans: self.spans,
570            data: self.data.into(),
571            ast: self.ast,
572            comments: self.comments.into_iter().map(|s| s.into()).collect(),
573            register_count: self
574                .register_allocation_state
575                .len()
576                .try_into()
577                .expect("register count overflowed in finish() despite previous checks"),
578            file_count: self.file_count,
579        })
580    }
581
582    pub(crate) fn begin_try(&mut self) {
583        self.context_stack.push_try();
584    }
585
586    pub(crate) fn end_try(&mut self) -> Result<(), CompileError> {
587        match self.context_stack.pop() {
588            Some(ContextBlock::Try) => Ok(()),
589            _ => Err(CompileError::NotInATry {
590                msg: "end_try() called outside of a try block".into(),
591                span: None,
592            }),
593        }
594    }
595}
596
597/// Keeps track of the `break` and `continue` target labels for a loop.
598#[derive(Debug, Clone, Copy, PartialEq, Eq)]
599pub(crate) struct Loop {
600    pub(crate) break_label: LabelId,
601    pub(crate) continue_label: LabelId,
602}
603
604/// Blocks that modify/define behavior for the instructions they contain.
605#[derive(Debug, Clone, Copy, PartialEq, Eq)]
606pub(crate) enum ContextBlock {
607    Loop(Loop),
608    Try,
609}
610
611#[derive(Debug, Clone)]
612pub(crate) struct ContextStack(Vec<ContextBlock>);
613
614impl ContextStack {
615    pub const fn new() -> Self {
616        Self(Vec::new())
617    }
618
619    pub fn push_loop(&mut self, r#loop: Loop) {
620        self.0.push(ContextBlock::Loop(r#loop));
621    }
622
623    pub fn push_try(&mut self) {
624        self.0.push(ContextBlock::Try);
625    }
626
627    pub fn pop(&mut self) -> Option<ContextBlock> {
628        self.0.pop()
629    }
630
631    pub fn current_loop(&self) -> Option<&Loop> {
632        self.0.iter().rev().find_map(|cb| match cb {
633            ContextBlock::Loop(r#loop) => Some(r#loop),
634            _ => None,
635        })
636    }
637
638    pub fn try_block_depth_from_loop(&self) -> usize {
639        self.0
640            .iter()
641            .rev()
642            .take_while(|&cb| matches!(cb, ContextBlock::Try))
643            .count()
644    }
645
646    pub fn is_in_loop(&self) -> bool {
647        self.current_loop().is_some()
648    }
649}
650
651/// Add a new comment to an existing one
652fn add_comment(comment: &mut String, new_comment: impl std::fmt::Display) {
653    use std::fmt::Write;
654    write!(
655        comment,
656        "{}{}",
657        if comment.is_empty() { "" } else { ", " },
658        new_comment
659    )
660    .expect("formatting failed");
661}