Skip to main content

nu_protocol/ir/
mod.rs

1use crate::{
2    BlockId, DeclId, Filesize, RegId, ShellError, Span, Value, VarId,
3    ast::{CellPath, Expression, Operator, Pattern, RangeInclusion},
4    engine::EngineState,
5};
6use chrono::{DateTime, FixedOffset};
7use serde::{Deserialize, Serialize};
8use std::{fmt, sync::Arc};
9
10mod call;
11mod display;
12
13pub use call::*;
14pub use display::{FmtInstruction, FmtIrBlock};
15
16#[derive(Clone, Serialize, Deserialize)]
17pub struct IrBlock {
18    pub instructions: Vec<Instruction>,
19    pub spans: Vec<Span>,
20    #[serde(with = "serde_arc_u8_array")]
21    pub data: Arc<[u8]>,
22    pub ast: Vec<Option<IrAstRef>>,
23    /// Additional information that can be added to help with debugging
24    pub comments: Vec<Box<str>>,
25    pub register_count: u32,
26    pub file_count: u32,
27}
28
29impl fmt::Debug for IrBlock {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        // the ast field is too verbose and doesn't add much
32        f.debug_struct("IrBlock")
33            .field("instructions", &self.instructions)
34            .field("spans", &self.spans)
35            .field("data", &self.data)
36            .field("comments", &self.comments)
37            .field("register_count", &self.register_count)
38            .field("file_count", &self.file_count)
39            .finish_non_exhaustive()
40    }
41}
42
43impl IrBlock {
44    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
45    /// listing of the instructions contained within this [`IrBlock`].
46    pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtIrBlock<'a> {
47        FmtIrBlock {
48            engine_state,
49            ir_block: self,
50        }
51    }
52}
53
54/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store
55/// string data that a block uses.
56#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
57pub struct DataSlice {
58    pub start: u32,
59    pub len: u32,
60}
61
62impl DataSlice {
63    /// A data slice that contains no data. This slice is always valid.
64    pub const fn empty() -> DataSlice {
65        DataSlice { start: 0, len: 0 }
66    }
67}
68
69impl std::ops::Index<DataSlice> for [u8] {
70    type Output = [u8];
71
72    fn index(&self, index: DataSlice) -> &Self::Output {
73        &self[index.start as usize..(index.start as usize + index.len as usize)]
74    }
75}
76
77/// A possible reference into the abstract syntax tree for an instruction. This is not present for
78/// most instructions and is just added when needed.
79#[derive(Debug, Clone)]
80pub struct IrAstRef(pub Arc<Expression>);
81
82impl Serialize for IrAstRef {
83    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
84    where
85        S: serde::Serializer,
86    {
87        self.0.as_ref().serialize(serializer)
88    }
89}
90
91impl<'de> Deserialize<'de> for IrAstRef {
92    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
93    where
94        D: serde::Deserializer<'de>,
95    {
96        Expression::deserialize(deserializer).map(|expr| IrAstRef(Arc::new(expr)))
97    }
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub enum Instruction {
102    /// Unreachable code path (error)
103    Unreachable,
104    /// Load a literal value into the `dst` register
105    LoadLiteral { dst: RegId, lit: Literal },
106    /// Load a clone of a boxed value into the `dst` register (e.g. from const evaluation)
107    LoadValue { dst: RegId, val: Box<Value> },
108    /// Move a register. Value is taken from `src` (used by this instruction).
109    Move { dst: RegId, src: RegId },
110    /// Copy a register (must be a collected value). Value is still in `src` after this instruction.
111    Clone { dst: RegId, src: RegId },
112    /// Collect a stream in a register to a value.
113    /// Because it collects to a value, nushell will ignore the errors in the stream.
114    /// It's important when the stream is from an external command
115    Collect { src_dst: RegId },
116    /// Collect a stream in a register to a value.
117    /// But it's different from `Collect` in that if there is an error in the stream, it will be
118    /// returned as an error instead of being ignored.
119    TryCollect { src_dst: RegId },
120    /// Change the span of the contents of a register to the span of this instruction.
121    Span { src_dst: RegId },
122    /// Drop the value/stream in a register, without draining
123    Drop { src: RegId },
124    /// Drain the value/stream in a register and discard (e.g. semicolon).
125    ///
126    /// If passed a stream from an external command, sets $env.LAST_EXIT_CODE to the resulting exit
127    /// code, and invokes any available error handler with Empty, or if not available, returns an
128    /// exit-code-only stream, leaving the block.
129    Drain { src: RegId },
130    /// Drain the value/stream in a register and discard only if this is the last pipeline element.
131    // TODO: see if it's possible to remove this
132    DrainIfEnd { src: RegId },
133    /// Load the value of a variable into the `dst` register
134    LoadVariable { dst: RegId, var_id: VarId },
135    /// Store the value of a variable from the `src` register
136    StoreVariable { var_id: VarId, src: RegId },
137    /// Remove a variable from the stack, freeing up whatever resources were associated with it
138    DropVariable { var_id: VarId },
139    /// Load the value of an environment variable into the `dst` register
140    LoadEnv { dst: RegId, key: DataSlice },
141    /// Load the value of an environment variable into the `dst` register, or `Nothing` if it
142    /// doesn't exist
143    LoadEnvOpt { dst: RegId, key: DataSlice },
144    /// Store the value of an environment variable from the `src` register
145    StoreEnv { key: DataSlice, src: RegId },
146    /// Add a positional arg to the next (internal) call.
147    PushPositional { src: RegId },
148    /// Add a list of args to the next (internal) call (spread/rest).
149    AppendRest { src: RegId },
150    /// Add a named arg with no value to the next (internal) call.
151    PushFlag { name: DataSlice },
152    /// Add a short named arg with no value to the next (internal) call.
153    PushShortFlag { short: DataSlice },
154    /// Add a named arg with a value to the next (internal) call.
155    PushNamed { name: DataSlice, src: RegId },
156    /// Add a short named arg with a value to the next (internal) call.
157    PushShortNamed { short: DataSlice, src: RegId },
158    /// Add parser info to the next (internal) call.
159    PushParserInfo {
160        name: DataSlice,
161        info: Box<Expression>,
162    },
163    /// Set the redirection for stdout for the next call (only).
164    ///
165    /// The register for a file redirection is not consumed.
166    RedirectOut { mode: RedirectMode },
167    /// Set the redirection for stderr for the next call (only).
168    ///
169    /// The register for a file redirection is not consumed.
170    RedirectErr { mode: RedirectMode },
171    /// Throw an error if stderr wasn't redirected in the given stream. `src` is preserved.
172    CheckErrRedirected { src: RegId },
173    /// Open a file for redirection, pushing it onto the file stack.
174    OpenFile {
175        file_num: u32,
176        path: RegId,
177        append: bool,
178    },
179    /// Write data from the register to a file. This is done to finish a file redirection, in case
180    /// an internal command or expression was evaluated rather than an external one.
181    WriteFile { file_num: u32, src: RegId },
182    /// Pop a file used for redirection from the file stack.
183    CloseFile { file_num: u32 },
184    /// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`,
185    /// overwriting it. The argument stack is used implicitly and cleared when the call ends.
186    Call { decl_id: DeclId, src_dst: RegId },
187    /// Append a value onto the end of a string. Uses `to_expanded_string(", ", ...)` on the value.
188    /// Used for string interpolation literals. Not the same thing as the `++` operator.
189    StringAppend { src_dst: RegId, val: RegId },
190    /// Convert a string into a glob. Used for glob interpolation and setting glob variables. If the
191    /// value is already a glob, it won't be modified (`no_expand` will have no effect).
192    GlobFrom { src_dst: RegId, no_expand: bool },
193    /// Push a value onto the end of a list. Used to construct list literals.
194    ListPush { src_dst: RegId, item: RegId },
195    /// Spread a value onto the end of a list. Used to construct list literals.
196    ListSpread { src_dst: RegId, items: RegId },
197    /// Insert a key-value pair into a record. Used to construct record literals. Raises an error if
198    /// the key already existed in the record.
199    RecordInsert {
200        src_dst: RegId,
201        key: RegId,
202        val: RegId,
203    },
204    /// Spread a record onto a record. Used to construct record literals. Any existing value for the
205    /// key is overwritten.
206    RecordSpread { src_dst: RegId, items: RegId },
207    /// Negate a boolean.
208    Not { src_dst: RegId },
209    /// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to
210    /// `lhs_dst`.
211    BinaryOp {
212        lhs_dst: RegId,
213        op: Operator,
214        rhs: RegId,
215    },
216    /// Follow a cell path on the value in `src_dst`, storing the result back to `src_dst`
217    FollowCellPath { src_dst: RegId, path: RegId },
218    /// Clone the value at a cell path in `src`, storing the result to `dst`. The original value
219    /// remains in `src`. Must be a collected value.
220    CloneCellPath { dst: RegId, src: RegId, path: RegId },
221    /// Update/insert a cell path to `new_value` on the value in `src_dst`, storing the modified
222    /// value back to `src_dst`
223    UpsertCellPath {
224        src_dst: RegId,
225        path: RegId,
226        new_value: RegId,
227    },
228    /// Jump to an offset in this block
229    Jump { index: usize },
230    /// Branch to an offset in this block if the value of the `cond` register is a true boolean,
231    /// otherwise continue execution
232    BranchIf { cond: RegId, index: usize },
233    /// Branch to an offset in this block if the value of the `src` register is Empty or Nothing,
234    /// otherwise continue execution. The original value in `src` is preserved.
235    BranchIfEmpty { src: RegId, index: usize },
236    /// Match a pattern on `src`. If the pattern matches, branch to `index` after having set any
237    /// variables captured by the pattern. If the pattern doesn't match, continue execution. The
238    /// original value is preserved in `src` through this instruction.
239    Match {
240        pattern: Box<Pattern>,
241        src: RegId,
242        index: usize,
243    },
244    /// Check that a match guard is a boolean, throwing
245    /// [`MatchGuardNotBool`](crate::ShellError::MatchGuardNotBool) if it isn't. Preserves `src`.
246    CheckMatchGuard { src: RegId },
247    /// Iterate on register `stream`, putting the next value in `dst` if present, or jumping to
248    /// `end_index` if the iterator is finished
249    Iterate {
250        dst: RegId,
251        stream: RegId,
252        end_index: usize,
253    },
254    /// Push an error handler, without capturing the error value
255    OnError { index: usize },
256    /// Push an error handler, capturing the error value into `dst`. If the error handler is not
257    /// called, the register should be freed manually.
258    OnErrorInto { index: usize, dst: RegId },
259    /// Push an finally handler, without capturing the error value
260    Finally { index: usize },
261    /// Push an finally handler, capturing the error value into `dst`. If the finally handler is not
262    /// called, the register should be freed manually.
263    FinallyInto { index: usize, dst: RegId },
264    /// Pop an error handler. This is not necessary when control flow is directed to the error
265    /// handler due to an error.
266    PopErrorHandler,
267    /// Pop an finally handler.
268    PopFinallyRun,
269    /// Return early from the block, raising a `ShellError::Return` instead.
270    ///
271    /// Collecting the value is unavoidable.
272    ReturnEarly { src: RegId },
273    /// Return from the block with the value in the register
274    Return { src: RegId },
275}
276
277impl Instruction {
278    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
279    /// listing of the instruction.
280    pub fn display<'a>(
281        &'a self,
282        engine_state: &'a EngineState,
283        data: &'a [u8],
284    ) -> FmtInstruction<'a> {
285        FmtInstruction {
286            engine_state,
287            instruction: self,
288            data,
289        }
290    }
291
292    /// Get the output register, for instructions that produce some kind of immediate result.
293    pub fn output_register(&self) -> Option<RegId> {
294        match *self {
295            Instruction::Unreachable => None,
296            Instruction::LoadLiteral { dst, .. } => Some(dst),
297            Instruction::LoadValue { dst, .. } => Some(dst),
298            Instruction::Move { dst, .. } => Some(dst),
299            Instruction::Clone { dst, .. } => Some(dst),
300            Instruction::Collect { src_dst } => Some(src_dst),
301            Instruction::TryCollect { src_dst } => Some(src_dst),
302            Instruction::Span { src_dst } => Some(src_dst),
303            Instruction::Drop { .. } => None,
304            Instruction::Drain { .. } => None,
305            Instruction::DrainIfEnd { .. } => None,
306            Instruction::LoadVariable { dst, .. } => Some(dst),
307            Instruction::StoreVariable { .. } => None,
308            Instruction::DropVariable { .. } => None,
309            Instruction::LoadEnv { dst, .. } => Some(dst),
310            Instruction::LoadEnvOpt { dst, .. } => Some(dst),
311            Instruction::StoreEnv { .. } => None,
312            Instruction::PushPositional { .. } => None,
313            Instruction::AppendRest { .. } => None,
314            Instruction::PushFlag { .. } => None,
315            Instruction::PushShortFlag { .. } => None,
316            Instruction::PushNamed { .. } => None,
317            Instruction::PushShortNamed { .. } => None,
318            Instruction::PushParserInfo { .. } => None,
319            Instruction::RedirectOut { .. } => None,
320            Instruction::RedirectErr { .. } => None,
321            Instruction::CheckErrRedirected { .. } => None,
322            Instruction::OpenFile { .. } => None,
323            Instruction::WriteFile { .. } => None,
324            Instruction::CloseFile { .. } => None,
325            Instruction::Call { src_dst, .. } => Some(src_dst),
326            Instruction::StringAppend { src_dst, .. } => Some(src_dst),
327            Instruction::GlobFrom { src_dst, .. } => Some(src_dst),
328            Instruction::ListPush { src_dst, .. } => Some(src_dst),
329            Instruction::ListSpread { src_dst, .. } => Some(src_dst),
330            Instruction::RecordInsert { src_dst, .. } => Some(src_dst),
331            Instruction::RecordSpread { src_dst, .. } => Some(src_dst),
332            Instruction::Not { src_dst } => Some(src_dst),
333            Instruction::BinaryOp { lhs_dst, .. } => Some(lhs_dst),
334            Instruction::FollowCellPath { src_dst, .. } => Some(src_dst),
335            Instruction::CloneCellPath { dst, .. } => Some(dst),
336            Instruction::UpsertCellPath { src_dst, .. } => Some(src_dst),
337            Instruction::Jump { .. } => None,
338            Instruction::BranchIf { .. } => None,
339            Instruction::BranchIfEmpty { .. } => None,
340            Instruction::Match { .. } => None,
341            Instruction::CheckMatchGuard { .. } => None,
342            Instruction::Iterate { dst, .. } => Some(dst),
343            Instruction::OnError { .. } => None,
344            Instruction::Finally { .. } => None,
345            Instruction::OnErrorInto { .. } => None,
346            Instruction::FinallyInto { .. } => None,
347            Instruction::PopErrorHandler => None,
348            Instruction::PopFinallyRun => None,
349            Instruction::ReturnEarly { .. } => None,
350            Instruction::Return { .. } => None,
351        }
352    }
353
354    /// Returns the branch target index of the instruction if this is a branching instruction.
355    pub fn branch_target(&self) -> Option<usize> {
356        match self {
357            Instruction::Jump { index } => Some(*index),
358            Instruction::BranchIf { cond: _, index } => Some(*index),
359            Instruction::BranchIfEmpty { src: _, index } => Some(*index),
360            Instruction::Match {
361                pattern: _,
362                src: _,
363                index,
364            } => Some(*index),
365
366            Instruction::Iterate {
367                dst: _,
368                stream: _,
369                end_index,
370            } => Some(*end_index),
371            Instruction::OnError { index } => Some(*index),
372            Instruction::OnErrorInto { index, dst: _ } => Some(*index),
373            Instruction::Finally { index } => Some(*index),
374            Instruction::FinallyInto { index, dst: _ } => Some(*index),
375            _ => None,
376        }
377    }
378
379    /// Sets the branch target of the instruction if this is a branching instruction.
380    ///
381    /// Returns `Err(target_index)` if it isn't a branching instruction.
382    pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> {
383        match self {
384            Instruction::Jump { index } => *index = target_index,
385            Instruction::BranchIf { cond: _, index } => *index = target_index,
386            Instruction::BranchIfEmpty { src: _, index } => *index = target_index,
387            Instruction::Match {
388                pattern: _,
389                src: _,
390                index,
391            } => *index = target_index,
392
393            Instruction::Iterate {
394                dst: _,
395                stream: _,
396                end_index,
397            } => *end_index = target_index,
398            Instruction::OnError { index } => *index = target_index,
399            Instruction::OnErrorInto { index, dst: _ } => *index = target_index,
400            Instruction::Finally { index } => *index = target_index,
401            Instruction::FinallyInto { index, dst: _ } => *index = target_index,
402            _ => return Err(target_index),
403        }
404        Ok(())
405    }
406
407    /// Check for an interrupt before certain instructions
408    pub fn check_interrupt(
409        &self,
410        engine_state: &EngineState,
411        span: &Span,
412    ) -> Result<(), ShellError> {
413        match self {
414            Instruction::Jump { .. } | Instruction::Return { .. } => {
415                engine_state.signals().check(span)
416            }
417            _ => Ok(()),
418        }
419    }
420}
421
422// This is to document/enforce the size of `Instruction` in bytes.
423// We should try to avoid increasing the size of `Instruction`,
424// and PRs that do so will have to change the number below so that it's noted in review.
425const _: () = assert!(std::mem::size_of::<Instruction>() <= 24);
426
427/// A literal value that can be embedded in an instruction.
428#[derive(Debug, Clone, Serialize, Deserialize)]
429pub enum Literal {
430    Bool(bool),
431    Int(i64),
432    Float(f64),
433    Filesize(Filesize),
434    Duration(i64),
435    Binary(DataSlice),
436    Block(BlockId),
437    Closure(BlockId),
438    RowCondition(BlockId),
439    Range {
440        start: RegId,
441        step: RegId,
442        end: RegId,
443        inclusion: RangeInclusion,
444    },
445    List {
446        capacity: usize,
447    },
448    Record {
449        capacity: usize,
450    },
451    Filepath {
452        val: DataSlice,
453        no_expand: bool,
454    },
455    Directory {
456        val: DataSlice,
457        no_expand: bool,
458    },
459    GlobPattern {
460        val: DataSlice,
461        no_expand: bool,
462    },
463    String(DataSlice),
464    RawString(DataSlice),
465    CellPath(Box<CellPath>),
466    Date(Box<DateTime<FixedOffset>>),
467    Nothing,
468    /// Represents an empty pipeline input (distinct from `Nothing` which is the `null` value).
469    /// Used by `load_empty` to initialize registers with no input.
470    Empty,
471}
472
473/// A redirection mode for the next call. See [`OutDest`](crate::OutDest).
474///
475/// This is generated by:
476///
477/// 1. Explicit redirection in a [`PipelineElement`](crate::ast::PipelineElement), or
478/// 2. The [`pipe_redirection()`](crate::engine::Command::pipe_redirection) of the command being
479///    piped into.
480///
481/// Not setting it uses the default, determined by [`Stack`](crate::engine::Stack).
482#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
483pub enum RedirectMode {
484    Pipe,
485    PipeSeparate,
486    Value,
487    Null,
488    Inherit,
489    Print,
490    /// Use the given numbered file.
491    File {
492        file_num: u32,
493    },
494    /// Use the redirection mode requested by the caller, for a pre-return call.
495    Caller,
496}
497
498/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized
499mod serde_arc_u8_array {
500    use serde::{Deserialize, Serialize};
501    use std::sync::Arc;
502
503    pub fn serialize<S>(data: &Arc<[u8]>, ser: S) -> Result<S::Ok, S::Error>
504    where
505        S: serde::Serializer,
506    {
507        data.as_ref().serialize(ser)
508    }
509
510    pub fn deserialize<'de, D>(de: D) -> Result<Arc<[u8]>, D::Error>
511    where
512        D: serde::Deserializer<'de>,
513    {
514        let data: Vec<u8> = Deserialize::deserialize(de)?;
515        Ok(data.into())
516    }
517}