Skip to main content

nu_protocol/ir/
mod.rs

1use crate::{
2    BlockId, DeclId, Filesize, RegId, ShellError, Span, Value, VarId,
3    ast::{CellPath, Expression, Operator, Pattern, RangeInclusion},
4    engine::EngineState,
5};
6use chrono::{DateTime, FixedOffset};
7use serde::{Deserialize, Serialize};
8use std::{fmt, sync::Arc};
9
10mod call;
11mod display;
12
13pub use call::*;
14pub use display::{FmtInstruction, FmtIrBlock};
15
16#[derive(Clone, Serialize, Deserialize)]
17pub struct IrBlock {
18    pub instructions: Vec<Instruction>,
19    pub spans: Vec<Span>,
20    #[serde(with = "serde_arc_u8_array")]
21    pub data: Arc<[u8]>,
22    pub ast: Vec<Option<IrAstRef>>,
23    /// Additional information that can be added to help with debugging
24    pub comments: Vec<Box<str>>,
25    pub register_count: u32,
26    pub file_count: u32,
27}
28
29impl fmt::Debug for IrBlock {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        // the ast field is too verbose and doesn't add much
32        f.debug_struct("IrBlock")
33            .field("instructions", &self.instructions)
34            .field("spans", &self.spans)
35            .field("data", &self.data)
36            .field("comments", &self.comments)
37            .field("register_count", &self.register_count)
38            .field("file_count", &self.file_count)
39            .finish_non_exhaustive()
40    }
41}
42
43impl IrBlock {
44    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
45    /// listing of the instructions contained within this [`IrBlock`].
46    pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtIrBlock<'a> {
47        FmtIrBlock {
48            engine_state,
49            ir_block: self,
50        }
51    }
52}
53
54/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store
55/// string data that a block uses.
56#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
57pub struct DataSlice {
58    pub start: u32,
59    pub len: u32,
60}
61
62impl DataSlice {
63    /// A data slice that contains no data. This slice is always valid.
64    pub const fn empty() -> DataSlice {
65        DataSlice { start: 0, len: 0 }
66    }
67}
68
69impl std::ops::Index<DataSlice> for [u8] {
70    type Output = [u8];
71
72    fn index(&self, index: DataSlice) -> &Self::Output {
73        &self[index.start as usize..(index.start as usize + index.len as usize)]
74    }
75}
76
77/// A possible reference into the abstract syntax tree for an instruction. This is not present for
78/// most instructions and is just added when needed.
79#[derive(Debug, Clone)]
80pub struct IrAstRef(pub Arc<Expression>);
81
82impl Serialize for IrAstRef {
83    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
84    where
85        S: serde::Serializer,
86    {
87        self.0.as_ref().serialize(serializer)
88    }
89}
90
91impl<'de> Deserialize<'de> for IrAstRef {
92    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
93    where
94        D: serde::Deserializer<'de>,
95    {
96        Expression::deserialize(deserializer).map(|expr| IrAstRef(Arc::new(expr)))
97    }
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub enum Instruction {
102    /// Unreachable code path (error)
103    Unreachable,
104    /// Load a literal value into the `dst` register
105    LoadLiteral { dst: RegId, lit: Literal },
106    /// Load a clone of a boxed value into the `dst` register (e.g. from const evaluation)
107    LoadValue { dst: RegId, val: Box<Value> },
108    /// Move a register. Value is taken from `src` (used by this instruction).
109    Move { dst: RegId, src: RegId },
110    /// Copy a register (must be a collected value). Value is still in `src` after this instruction.
111    Clone { dst: RegId, src: RegId },
112    /// Collect a stream in a register to a value
113    Collect { src_dst: RegId },
114    /// Change the span of the contents of a register to the span of this instruction.
115    Span { src_dst: RegId },
116    /// Drop the value/stream in a register, without draining
117    Drop { src: RegId },
118    /// Drain the value/stream in a register and discard (e.g. semicolon).
119    ///
120    /// If passed a stream from an external command, sets $env.LAST_EXIT_CODE to the resulting exit
121    /// code, and invokes any available error handler with Empty, or if not available, returns an
122    /// exit-code-only stream, leaving the block.
123    Drain { src: RegId },
124    /// Drain the value/stream in a register and discard only if this is the last pipeline element.
125    // TODO: see if it's possible to remove this
126    DrainIfEnd { src: RegId },
127    /// Load the value of a variable into the `dst` register
128    LoadVariable { dst: RegId, var_id: VarId },
129    /// Store the value of a variable from the `src` register
130    StoreVariable { var_id: VarId, src: RegId },
131    /// Remove a variable from the stack, freeing up whatever resources were associated with it
132    DropVariable { var_id: VarId },
133    /// Load the value of an environment variable into the `dst` register
134    LoadEnv { dst: RegId, key: DataSlice },
135    /// Load the value of an environment variable into the `dst` register, or `Nothing` if it
136    /// doesn't exist
137    LoadEnvOpt { dst: RegId, key: DataSlice },
138    /// Store the value of an environment variable from the `src` register
139    StoreEnv { key: DataSlice, src: RegId },
140    /// Add a positional arg to the next (internal) call.
141    PushPositional { src: RegId },
142    /// Add a list of args to the next (internal) call (spread/rest).
143    AppendRest { src: RegId },
144    /// Add a named arg with no value to the next (internal) call.
145    PushFlag { name: DataSlice },
146    /// Add a short named arg with no value to the next (internal) call.
147    PushShortFlag { short: DataSlice },
148    /// Add a named arg with a value to the next (internal) call.
149    PushNamed { name: DataSlice, src: RegId },
150    /// Add a short named arg with a value to the next (internal) call.
151    PushShortNamed { short: DataSlice, src: RegId },
152    /// Add parser info to the next (internal) call.
153    PushParserInfo {
154        name: DataSlice,
155        info: Box<Expression>,
156    },
157    /// Set the redirection for stdout for the next call (only).
158    ///
159    /// The register for a file redirection is not consumed.
160    RedirectOut { mode: RedirectMode },
161    /// Set the redirection for stderr for the next call (only).
162    ///
163    /// The register for a file redirection is not consumed.
164    RedirectErr { mode: RedirectMode },
165    /// Throw an error if stderr wasn't redirected in the given stream. `src` is preserved.
166    CheckErrRedirected { src: RegId },
167    /// Open a file for redirection, pushing it onto the file stack.
168    OpenFile {
169        file_num: u32,
170        path: RegId,
171        append: bool,
172    },
173    /// Write data from the register to a file. This is done to finish a file redirection, in case
174    /// an internal command or expression was evaluated rather than an external one.
175    WriteFile { file_num: u32, src: RegId },
176    /// Pop a file used for redirection from the file stack.
177    CloseFile { file_num: u32 },
178    /// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`,
179    /// overwriting it. The argument stack is used implicitly and cleared when the call ends.
180    Call { decl_id: DeclId, src_dst: RegId },
181    /// Append a value onto the end of a string. Uses `to_expanded_string(", ", ...)` on the value.
182    /// Used for string interpolation literals. Not the same thing as the `++` operator.
183    StringAppend { src_dst: RegId, val: RegId },
184    /// Convert a string into a glob. Used for glob interpolation and setting glob variables. If the
185    /// value is already a glob, it won't be modified (`no_expand` will have no effect).
186    GlobFrom { src_dst: RegId, no_expand: bool },
187    /// Push a value onto the end of a list. Used to construct list literals.
188    ListPush { src_dst: RegId, item: RegId },
189    /// Spread a value onto the end of a list. Used to construct list literals.
190    ListSpread { src_dst: RegId, items: RegId },
191    /// Insert a key-value pair into a record. Used to construct record literals. Raises an error if
192    /// the key already existed in the record.
193    RecordInsert {
194        src_dst: RegId,
195        key: RegId,
196        val: RegId,
197    },
198    /// Spread a record onto a record. Used to construct record literals. Any existing value for the
199    /// key is overwritten.
200    RecordSpread { src_dst: RegId, items: RegId },
201    /// Negate a boolean.
202    Not { src_dst: RegId },
203    /// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to
204    /// `lhs_dst`.
205    BinaryOp {
206        lhs_dst: RegId,
207        op: Operator,
208        rhs: RegId,
209    },
210    /// Follow a cell path on the value in `src_dst`, storing the result back to `src_dst`
211    FollowCellPath { src_dst: RegId, path: RegId },
212    /// Clone the value at a cell path in `src`, storing the result to `dst`. The original value
213    /// remains in `src`. Must be a collected value.
214    CloneCellPath { dst: RegId, src: RegId, path: RegId },
215    /// Update/insert a cell path to `new_value` on the value in `src_dst`, storing the modified
216    /// value back to `src_dst`
217    UpsertCellPath {
218        src_dst: RegId,
219        path: RegId,
220        new_value: RegId,
221    },
222    /// Jump to an offset in this block
223    Jump { index: usize },
224    /// Branch to an offset in this block if the value of the `cond` register is a true boolean,
225    /// otherwise continue execution
226    BranchIf { cond: RegId, index: usize },
227    /// Branch to an offset in this block if the value of the `src` register is Empty or Nothing,
228    /// otherwise continue execution. The original value in `src` is preserved.
229    BranchIfEmpty { src: RegId, index: usize },
230    /// Match a pattern on `src`. If the pattern matches, branch to `index` after having set any
231    /// variables captured by the pattern. If the pattern doesn't match, continue execution. The
232    /// original value is preserved in `src` through this instruction.
233    Match {
234        pattern: Box<Pattern>,
235        src: RegId,
236        index: usize,
237    },
238    /// Check that a match guard is a boolean, throwing
239    /// [`MatchGuardNotBool`](crate::ShellError::MatchGuardNotBool) if it isn't. Preserves `src`.
240    CheckMatchGuard { src: RegId },
241    /// Iterate on register `stream`, putting the next value in `dst` if present, or jumping to
242    /// `end_index` if the iterator is finished
243    Iterate {
244        dst: RegId,
245        stream: RegId,
246        end_index: usize,
247    },
248    /// Push an error handler, without capturing the error value
249    OnError { index: usize },
250    /// Push an error handler, capturing the error value into `dst`. If the error handler is not
251    /// called, the register should be freed manually.
252    OnErrorInto { index: usize, dst: RegId },
253    /// Push an finally handler, without capturing the error value
254    Finally { index: usize },
255    /// Push an finally handler, capturing the error value into `dst`. If the finally handler is not
256    /// called, the register should be freed manually.
257    FinallyInto { index: usize, dst: RegId },
258    /// Pop an error handler. This is not necessary when control flow is directed to the error
259    /// handler due to an error.
260    PopErrorHandler,
261    /// Pop an finally handler.
262    PopFinallyRun,
263    /// Return early from the block, raising a `ShellError::Return` instead.
264    ///
265    /// Collecting the value is unavoidable.
266    ReturnEarly { src: RegId },
267    /// Return from the block with the value in the register
268    Return { src: RegId },
269}
270
271impl Instruction {
272    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
273    /// listing of the instruction.
274    pub fn display<'a>(
275        &'a self,
276        engine_state: &'a EngineState,
277        data: &'a [u8],
278    ) -> FmtInstruction<'a> {
279        FmtInstruction {
280            engine_state,
281            instruction: self,
282            data,
283        }
284    }
285
286    /// Get the output register, for instructions that produce some kind of immediate result.
287    pub fn output_register(&self) -> Option<RegId> {
288        match *self {
289            Instruction::Unreachable => None,
290            Instruction::LoadLiteral { dst, .. } => Some(dst),
291            Instruction::LoadValue { dst, .. } => Some(dst),
292            Instruction::Move { dst, .. } => Some(dst),
293            Instruction::Clone { dst, .. } => Some(dst),
294            Instruction::Collect { src_dst } => Some(src_dst),
295            Instruction::Span { src_dst } => Some(src_dst),
296            Instruction::Drop { .. } => None,
297            Instruction::Drain { .. } => None,
298            Instruction::DrainIfEnd { .. } => None,
299            Instruction::LoadVariable { dst, .. } => Some(dst),
300            Instruction::StoreVariable { .. } => None,
301            Instruction::DropVariable { .. } => None,
302            Instruction::LoadEnv { dst, .. } => Some(dst),
303            Instruction::LoadEnvOpt { dst, .. } => Some(dst),
304            Instruction::StoreEnv { .. } => None,
305            Instruction::PushPositional { .. } => None,
306            Instruction::AppendRest { .. } => None,
307            Instruction::PushFlag { .. } => None,
308            Instruction::PushShortFlag { .. } => None,
309            Instruction::PushNamed { .. } => None,
310            Instruction::PushShortNamed { .. } => None,
311            Instruction::PushParserInfo { .. } => None,
312            Instruction::RedirectOut { .. } => None,
313            Instruction::RedirectErr { .. } => None,
314            Instruction::CheckErrRedirected { .. } => None,
315            Instruction::OpenFile { .. } => None,
316            Instruction::WriteFile { .. } => None,
317            Instruction::CloseFile { .. } => None,
318            Instruction::Call { src_dst, .. } => Some(src_dst),
319            Instruction::StringAppend { src_dst, .. } => Some(src_dst),
320            Instruction::GlobFrom { src_dst, .. } => Some(src_dst),
321            Instruction::ListPush { src_dst, .. } => Some(src_dst),
322            Instruction::ListSpread { src_dst, .. } => Some(src_dst),
323            Instruction::RecordInsert { src_dst, .. } => Some(src_dst),
324            Instruction::RecordSpread { src_dst, .. } => Some(src_dst),
325            Instruction::Not { src_dst } => Some(src_dst),
326            Instruction::BinaryOp { lhs_dst, .. } => Some(lhs_dst),
327            Instruction::FollowCellPath { src_dst, .. } => Some(src_dst),
328            Instruction::CloneCellPath { dst, .. } => Some(dst),
329            Instruction::UpsertCellPath { src_dst, .. } => Some(src_dst),
330            Instruction::Jump { .. } => None,
331            Instruction::BranchIf { .. } => None,
332            Instruction::BranchIfEmpty { .. } => None,
333            Instruction::Match { .. } => None,
334            Instruction::CheckMatchGuard { .. } => None,
335            Instruction::Iterate { dst, .. } => Some(dst),
336            Instruction::OnError { .. } => None,
337            Instruction::Finally { .. } => None,
338            Instruction::OnErrorInto { .. } => None,
339            Instruction::FinallyInto { .. } => None,
340            Instruction::PopErrorHandler => None,
341            Instruction::PopFinallyRun => None,
342            Instruction::ReturnEarly { .. } => None,
343            Instruction::Return { .. } => None,
344        }
345    }
346
347    /// Returns the branch target index of the instruction if this is a branching instruction.
348    pub fn branch_target(&self) -> Option<usize> {
349        match self {
350            Instruction::Jump { index } => Some(*index),
351            Instruction::BranchIf { cond: _, index } => Some(*index),
352            Instruction::BranchIfEmpty { src: _, index } => Some(*index),
353            Instruction::Match {
354                pattern: _,
355                src: _,
356                index,
357            } => Some(*index),
358
359            Instruction::Iterate {
360                dst: _,
361                stream: _,
362                end_index,
363            } => Some(*end_index),
364            Instruction::OnError { index } => Some(*index),
365            Instruction::OnErrorInto { index, dst: _ } => Some(*index),
366            Instruction::Finally { index } => Some(*index),
367            Instruction::FinallyInto { index, dst: _ } => Some(*index),
368            _ => None,
369        }
370    }
371
372    /// Sets the branch target of the instruction if this is a branching instruction.
373    ///
374    /// Returns `Err(target_index)` if it isn't a branching instruction.
375    pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> {
376        match self {
377            Instruction::Jump { index } => *index = target_index,
378            Instruction::BranchIf { cond: _, index } => *index = target_index,
379            Instruction::BranchIfEmpty { src: _, index } => *index = target_index,
380            Instruction::Match {
381                pattern: _,
382                src: _,
383                index,
384            } => *index = target_index,
385
386            Instruction::Iterate {
387                dst: _,
388                stream: _,
389                end_index,
390            } => *end_index = target_index,
391            Instruction::OnError { index } => *index = target_index,
392            Instruction::OnErrorInto { index, dst: _ } => *index = target_index,
393            Instruction::Finally { index } => *index = target_index,
394            Instruction::FinallyInto { index, dst: _ } => *index = target_index,
395            _ => return Err(target_index),
396        }
397        Ok(())
398    }
399
400    /// Check for an interrupt before certain instructions
401    pub fn check_interrupt(
402        &self,
403        engine_state: &EngineState,
404        span: &Span,
405    ) -> Result<(), ShellError> {
406        match self {
407            Instruction::Jump { .. } | Instruction::Return { .. } => {
408                engine_state.signals().check(span)
409            }
410            _ => Ok(()),
411        }
412    }
413}
414
415// This is to document/enforce the size of `Instruction` in bytes.
416// We should try to avoid increasing the size of `Instruction`,
417// and PRs that do so will have to change the number below so that it's noted in review.
418const _: () = assert!(std::mem::size_of::<Instruction>() <= 24);
419
420/// A literal value that can be embedded in an instruction.
421#[derive(Debug, Clone, Serialize, Deserialize)]
422pub enum Literal {
423    Bool(bool),
424    Int(i64),
425    Float(f64),
426    Filesize(Filesize),
427    Duration(i64),
428    Binary(DataSlice),
429    Block(BlockId),
430    Closure(BlockId),
431    RowCondition(BlockId),
432    Range {
433        start: RegId,
434        step: RegId,
435        end: RegId,
436        inclusion: RangeInclusion,
437    },
438    List {
439        capacity: usize,
440    },
441    Record {
442        capacity: usize,
443    },
444    Filepath {
445        val: DataSlice,
446        no_expand: bool,
447    },
448    Directory {
449        val: DataSlice,
450        no_expand: bool,
451    },
452    GlobPattern {
453        val: DataSlice,
454        no_expand: bool,
455    },
456    String(DataSlice),
457    RawString(DataSlice),
458    CellPath(Box<CellPath>),
459    Date(Box<DateTime<FixedOffset>>),
460    Nothing,
461    /// Represents an empty pipeline input (distinct from `Nothing` which is the `null` value).
462    /// Used by `load_empty` to initialize registers with no input.
463    Empty,
464}
465
466/// A redirection mode for the next call. See [`OutDest`](crate::OutDest).
467///
468/// This is generated by:
469///
470/// 1. Explicit redirection in a [`PipelineElement`](crate::ast::PipelineElement), or
471/// 2. The [`pipe_redirection()`](crate::engine::Command::pipe_redirection) of the command being
472///    piped into.
473///
474/// Not setting it uses the default, determined by [`Stack`](crate::engine::Stack).
475#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
476pub enum RedirectMode {
477    Pipe,
478    PipeSeparate,
479    Value,
480    Null,
481    Inherit,
482    Print,
483    /// Use the given numbered file.
484    File {
485        file_num: u32,
486    },
487    /// Use the redirection mode requested by the caller, for a pre-return call.
488    Caller,
489}
490
491/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized
492mod serde_arc_u8_array {
493    use serde::{Deserialize, Serialize};
494    use std::sync::Arc;
495
496    pub fn serialize<S>(data: &Arc<[u8]>, ser: S) -> Result<S::Ok, S::Error>
497    where
498        S: serde::Serializer,
499    {
500        data.as_ref().serialize(ser)
501    }
502
503    pub fn deserialize<'de, D>(de: D) -> Result<Arc<[u8]>, D::Error>
504    where
505        D: serde::Deserializer<'de>,
506    {
507        let data: Vec<u8> = Deserialize::deserialize(de)?;
508        Ok(data.into())
509    }
510}