nu_protocol/ir/
mod.rs

1use crate::{
2    BlockId, DeclId, Filesize, RegId, Span, Value, VarId,
3    ast::{CellPath, Expression, Operator, Pattern, RangeInclusion},
4    engine::EngineState,
5};
6use chrono::{DateTime, FixedOffset};
7use serde::{Deserialize, Serialize};
8use std::{fmt, sync::Arc};
9
10mod call;
11mod display;
12
13pub use call::*;
14pub use display::{FmtInstruction, FmtIrBlock};
15
16#[derive(Clone, Serialize, Deserialize)]
17pub struct IrBlock {
18    pub instructions: Vec<Instruction>,
19    pub spans: Vec<Span>,
20    #[serde(with = "serde_arc_u8_array")]
21    pub data: Arc<[u8]>,
22    pub ast: Vec<Option<IrAstRef>>,
23    /// Additional information that can be added to help with debugging
24    pub comments: Vec<Box<str>>,
25    pub register_count: u32,
26    pub file_count: u32,
27}
28
29impl fmt::Debug for IrBlock {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        // the ast field is too verbose and doesn't add much
32        f.debug_struct("IrBlock")
33            .field("instructions", &self.instructions)
34            .field("spans", &self.spans)
35            .field("data", &self.data)
36            .field("comments", &self.comments)
37            .field("register_count", &self.register_count)
38            .field("file_count", &self.file_count)
39            .finish_non_exhaustive()
40    }
41}
42
43impl IrBlock {
44    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
45    /// listing of the instructions contained within this [`IrBlock`].
46    pub fn display<'a>(&'a self, engine_state: &'a EngineState) -> FmtIrBlock<'a> {
47        FmtIrBlock {
48            engine_state,
49            ir_block: self,
50        }
51    }
52}
53
54/// A slice into the `data` array of a block. This is a compact and cache-friendly way to store
55/// string data that a block uses.
56#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
57pub struct DataSlice {
58    pub start: u32,
59    pub len: u32,
60}
61
62impl DataSlice {
63    /// A data slice that contains no data. This slice is always valid.
64    pub const fn empty() -> DataSlice {
65        DataSlice { start: 0, len: 0 }
66    }
67}
68
69impl std::ops::Index<DataSlice> for [u8] {
70    type Output = [u8];
71
72    fn index(&self, index: DataSlice) -> &Self::Output {
73        &self[index.start as usize..(index.start as usize + index.len as usize)]
74    }
75}
76
77/// A possible reference into the abstract syntax tree for an instruction. This is not present for
78/// most instructions and is just added when needed.
79#[derive(Debug, Clone)]
80pub struct IrAstRef(pub Arc<Expression>);
81
82impl Serialize for IrAstRef {
83    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
84    where
85        S: serde::Serializer,
86    {
87        self.0.as_ref().serialize(serializer)
88    }
89}
90
91impl<'de> Deserialize<'de> for IrAstRef {
92    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
93    where
94        D: serde::Deserializer<'de>,
95    {
96        Expression::deserialize(deserializer).map(|expr| IrAstRef(Arc::new(expr)))
97    }
98}
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub enum Instruction {
102    /// Unreachable code path (error)
103    Unreachable,
104    /// Load a literal value into the `dst` register
105    LoadLiteral { dst: RegId, lit: Literal },
106    /// Load a clone of a boxed value into the `dst` register (e.g. from const evaluation)
107    LoadValue { dst: RegId, val: Box<Value> },
108    /// Move a register. Value is taken from `src` (used by this instruction).
109    Move { dst: RegId, src: RegId },
110    /// Copy a register (must be a collected value). Value is still in `src` after this instruction.
111    Clone { dst: RegId, src: RegId },
112    /// Collect a stream in a register to a value
113    Collect { src_dst: RegId },
114    /// Change the span of the contents of a register to the span of this instruction.
115    Span { src_dst: RegId },
116    /// Drop the value/stream in a register, without draining
117    Drop { src: RegId },
118    /// Drain the value/stream in a register and discard (e.g. semicolon).
119    ///
120    /// If passed a stream from an external command, sets $env.LAST_EXIT_CODE to the resulting exit
121    /// code, and invokes any available error handler with Empty, or if not available, returns an
122    /// exit-code-only stream, leaving the block.
123    Drain { src: RegId },
124    /// Drain the value/stream in a register and discard only if this is the last pipeline element.
125    // TODO: see if it's possible to remove this
126    DrainIfEnd { src: RegId },
127    /// Load the value of a variable into the `dst` register
128    LoadVariable { dst: RegId, var_id: VarId },
129    /// Store the value of a variable from the `src` register
130    StoreVariable { var_id: VarId, src: RegId },
131    /// Remove a variable from the stack, freeing up whatever resources were associated with it
132    DropVariable { var_id: VarId },
133    /// Load the value of an environment variable into the `dst` register
134    LoadEnv { dst: RegId, key: DataSlice },
135    /// Load the value of an environment variable into the `dst` register, or `Nothing` if it
136    /// doesn't exist
137    LoadEnvOpt { dst: RegId, key: DataSlice },
138    /// Store the value of an environment variable from the `src` register
139    StoreEnv { key: DataSlice, src: RegId },
140    /// Add a positional arg to the next (internal) call.
141    PushPositional { src: RegId },
142    /// Add a list of args to the next (internal) call (spread/rest).
143    AppendRest { src: RegId },
144    /// Add a named arg with no value to the next (internal) call.
145    PushFlag { name: DataSlice },
146    /// Add a short named arg with no value to the next (internal) call.
147    PushShortFlag { short: DataSlice },
148    /// Add a named arg with a value to the next (internal) call.
149    PushNamed { name: DataSlice, src: RegId },
150    /// Add a short named arg with a value to the next (internal) call.
151    PushShortNamed { short: DataSlice, src: RegId },
152    /// Add parser info to the next (internal) call.
153    PushParserInfo {
154        name: DataSlice,
155        info: Box<Expression>,
156    },
157    /// Set the redirection for stdout for the next call (only).
158    ///
159    /// The register for a file redirection is not consumed.
160    RedirectOut { mode: RedirectMode },
161    /// Set the redirection for stderr for the next call (only).
162    ///
163    /// The register for a file redirection is not consumed.
164    RedirectErr { mode: RedirectMode },
165    /// Throw an error if stderr wasn't redirected in the given stream. `src` is preserved.
166    CheckErrRedirected { src: RegId },
167    /// Open a file for redirection, pushing it onto the file stack.
168    OpenFile {
169        file_num: u32,
170        path: RegId,
171        append: bool,
172    },
173    /// Write data from the register to a file. This is done to finish a file redirection, in case
174    /// an internal command or expression was evaluated rather than an external one.
175    WriteFile { file_num: u32, src: RegId },
176    /// Pop a file used for redirection from the file stack.
177    CloseFile { file_num: u32 },
178    /// Make a call. The input is taken from `src_dst`, and the output is placed in `src_dst`,
179    /// overwriting it. The argument stack is used implicitly and cleared when the call ends.
180    Call { decl_id: DeclId, src_dst: RegId },
181    /// Append a value onto the end of a string. Uses `to_expanded_string(", ", ...)` on the value.
182    /// Used for string interpolation literals. Not the same thing as the `++` operator.
183    StringAppend { src_dst: RegId, val: RegId },
184    /// Convert a string into a glob. Used for glob interpolation and setting glob variables. If the
185    /// value is already a glob, it won't be modified (`no_expand` will have no effect).
186    GlobFrom { src_dst: RegId, no_expand: bool },
187    /// Push a value onto the end of a list. Used to construct list literals.
188    ListPush { src_dst: RegId, item: RegId },
189    /// Spread a value onto the end of a list. Used to construct list literals.
190    ListSpread { src_dst: RegId, items: RegId },
191    /// Insert a key-value pair into a record. Used to construct record literals. Raises an error if
192    /// the key already existed in the record.
193    RecordInsert {
194        src_dst: RegId,
195        key: RegId,
196        val: RegId,
197    },
198    /// Spread a record onto a record. Used to construct record literals. Any existing value for the
199    /// key is overwritten.
200    RecordSpread { src_dst: RegId, items: RegId },
201    /// Negate a boolean.
202    Not { src_dst: RegId },
203    /// Do a binary operation on `lhs_dst` (left) and `rhs` (right) and write the result to
204    /// `lhs_dst`.
205    BinaryOp {
206        lhs_dst: RegId,
207        op: Operator,
208        rhs: RegId,
209    },
210    /// Follow a cell path on the value in `src_dst`, storing the result back to `src_dst`
211    FollowCellPath { src_dst: RegId, path: RegId },
212    /// Clone the value at a cell path in `src`, storing the result to `dst`. The original value
213    /// remains in `src`. Must be a collected value.
214    CloneCellPath { dst: RegId, src: RegId, path: RegId },
215    /// Update/insert a cell path to `new_value` on the value in `src_dst`, storing the modified
216    /// value back to `src_dst`
217    UpsertCellPath {
218        src_dst: RegId,
219        path: RegId,
220        new_value: RegId,
221    },
222    /// Jump to an offset in this block
223    Jump { index: usize },
224    /// Branch to an offset in this block if the value of the `cond` register is a true boolean,
225    /// otherwise continue execution
226    BranchIf { cond: RegId, index: usize },
227    /// Branch to an offset in this block if the value of the `src` register is Empty or Nothing,
228    /// otherwise continue execution. The original value in `src` is preserved.
229    BranchIfEmpty { src: RegId, index: usize },
230    /// Match a pattern on `src`. If the pattern matches, branch to `index` after having set any
231    /// variables captured by the pattern. If the pattern doesn't match, continue execution. The
232    /// original value is preserved in `src` through this instruction.
233    Match {
234        pattern: Box<Pattern>,
235        src: RegId,
236        index: usize,
237    },
238    /// Check that a match guard is a boolean, throwing
239    /// [`MatchGuardNotBool`](crate::ShellError::MatchGuardNotBool) if it isn't. Preserves `src`.
240    CheckMatchGuard { src: RegId },
241    /// Iterate on register `stream`, putting the next value in `dst` if present, or jumping to
242    /// `end_index` if the iterator is finished
243    Iterate {
244        dst: RegId,
245        stream: RegId,
246        end_index: usize,
247    },
248    /// Push an error handler, without capturing the error value
249    OnError { index: usize },
250    /// Push an error handler, capturing the error value into `dst`. If the error handler is not
251    /// called, the register should be freed manually.
252    OnErrorInto { index: usize, dst: RegId },
253    /// Pop an error handler. This is not necessary when control flow is directed to the error
254    /// handler due to an error.
255    PopErrorHandler,
256    /// Return early from the block, raising a `ShellError::Return` instead.
257    ///
258    /// Collecting the value is unavoidable.
259    ReturnEarly { src: RegId },
260    /// Return from the block with the value in the register
261    Return { src: RegId },
262}
263
264impl Instruction {
265    /// Returns a value that can be formatted with [`Display`](std::fmt::Display) to show a detailed
266    /// listing of the instruction.
267    pub fn display<'a>(
268        &'a self,
269        engine_state: &'a EngineState,
270        data: &'a [u8],
271    ) -> FmtInstruction<'a> {
272        FmtInstruction {
273            engine_state,
274            instruction: self,
275            data,
276        }
277    }
278
279    /// Get the output register, for instructions that produce some kind of immediate result.
280    pub fn output_register(&self) -> Option<RegId> {
281        match *self {
282            Instruction::Unreachable => None,
283            Instruction::LoadLiteral { dst, .. } => Some(dst),
284            Instruction::LoadValue { dst, .. } => Some(dst),
285            Instruction::Move { dst, .. } => Some(dst),
286            Instruction::Clone { dst, .. } => Some(dst),
287            Instruction::Collect { src_dst } => Some(src_dst),
288            Instruction::Span { src_dst } => Some(src_dst),
289            Instruction::Drop { .. } => None,
290            Instruction::Drain { .. } => None,
291            Instruction::DrainIfEnd { .. } => None,
292            Instruction::LoadVariable { dst, .. } => Some(dst),
293            Instruction::StoreVariable { .. } => None,
294            Instruction::DropVariable { .. } => None,
295            Instruction::LoadEnv { dst, .. } => Some(dst),
296            Instruction::LoadEnvOpt { dst, .. } => Some(dst),
297            Instruction::StoreEnv { .. } => None,
298            Instruction::PushPositional { .. } => None,
299            Instruction::AppendRest { .. } => None,
300            Instruction::PushFlag { .. } => None,
301            Instruction::PushShortFlag { .. } => None,
302            Instruction::PushNamed { .. } => None,
303            Instruction::PushShortNamed { .. } => None,
304            Instruction::PushParserInfo { .. } => None,
305            Instruction::RedirectOut { .. } => None,
306            Instruction::RedirectErr { .. } => None,
307            Instruction::CheckErrRedirected { .. } => None,
308            Instruction::OpenFile { .. } => None,
309            Instruction::WriteFile { .. } => None,
310            Instruction::CloseFile { .. } => None,
311            Instruction::Call { src_dst, .. } => Some(src_dst),
312            Instruction::StringAppend { src_dst, .. } => Some(src_dst),
313            Instruction::GlobFrom { src_dst, .. } => Some(src_dst),
314            Instruction::ListPush { src_dst, .. } => Some(src_dst),
315            Instruction::ListSpread { src_dst, .. } => Some(src_dst),
316            Instruction::RecordInsert { src_dst, .. } => Some(src_dst),
317            Instruction::RecordSpread { src_dst, .. } => Some(src_dst),
318            Instruction::Not { src_dst } => Some(src_dst),
319            Instruction::BinaryOp { lhs_dst, .. } => Some(lhs_dst),
320            Instruction::FollowCellPath { src_dst, .. } => Some(src_dst),
321            Instruction::CloneCellPath { dst, .. } => Some(dst),
322            Instruction::UpsertCellPath { src_dst, .. } => Some(src_dst),
323            Instruction::Jump { .. } => None,
324            Instruction::BranchIf { .. } => None,
325            Instruction::BranchIfEmpty { .. } => None,
326            Instruction::Match { .. } => None,
327            Instruction::CheckMatchGuard { .. } => None,
328            Instruction::Iterate { dst, .. } => Some(dst),
329            Instruction::OnError { .. } => None,
330            Instruction::OnErrorInto { .. } => None,
331            Instruction::PopErrorHandler => None,
332            Instruction::ReturnEarly { .. } => None,
333            Instruction::Return { .. } => None,
334        }
335    }
336
337    /// Returns the branch target index of the instruction if this is a branching instruction.
338    pub fn branch_target(&self) -> Option<usize> {
339        match self {
340            Instruction::Jump { index } => Some(*index),
341            Instruction::BranchIf { cond: _, index } => Some(*index),
342            Instruction::BranchIfEmpty { src: _, index } => Some(*index),
343            Instruction::Match {
344                pattern: _,
345                src: _,
346                index,
347            } => Some(*index),
348
349            Instruction::Iterate {
350                dst: _,
351                stream: _,
352                end_index,
353            } => Some(*end_index),
354            Instruction::OnError { index } => Some(*index),
355            Instruction::OnErrorInto { index, dst: _ } => Some(*index),
356            _ => None,
357        }
358    }
359
360    /// Sets the branch target of the instruction if this is a branching instruction.
361    ///
362    /// Returns `Err(target_index)` if it isn't a branching instruction.
363    pub fn set_branch_target(&mut self, target_index: usize) -> Result<(), usize> {
364        match self {
365            Instruction::Jump { index } => *index = target_index,
366            Instruction::BranchIf { cond: _, index } => *index = target_index,
367            Instruction::BranchIfEmpty { src: _, index } => *index = target_index,
368            Instruction::Match {
369                pattern: _,
370                src: _,
371                index,
372            } => *index = target_index,
373
374            Instruction::Iterate {
375                dst: _,
376                stream: _,
377                end_index,
378            } => *end_index = target_index,
379            Instruction::OnError { index } => *index = target_index,
380            Instruction::OnErrorInto { index, dst: _ } => *index = target_index,
381            _ => return Err(target_index),
382        }
383        Ok(())
384    }
385}
386
387// This is to document/enforce the size of `Instruction` in bytes.
388// We should try to avoid increasing the size of `Instruction`,
389// and PRs that do so will have to change the number below so that it's noted in review.
390const _: () = assert!(std::mem::size_of::<Instruction>() <= 24);
391
392/// A literal value that can be embedded in an instruction.
393#[derive(Debug, Clone, Serialize, Deserialize)]
394pub enum Literal {
395    Bool(bool),
396    Int(i64),
397    Float(f64),
398    Filesize(Filesize),
399    Duration(i64),
400    Binary(DataSlice),
401    Block(BlockId),
402    Closure(BlockId),
403    RowCondition(BlockId),
404    Range {
405        start: RegId,
406        step: RegId,
407        end: RegId,
408        inclusion: RangeInclusion,
409    },
410    List {
411        capacity: usize,
412    },
413    Record {
414        capacity: usize,
415    },
416    Filepath {
417        val: DataSlice,
418        no_expand: bool,
419    },
420    Directory {
421        val: DataSlice,
422        no_expand: bool,
423    },
424    GlobPattern {
425        val: DataSlice,
426        no_expand: bool,
427    },
428    String(DataSlice),
429    RawString(DataSlice),
430    CellPath(Box<CellPath>),
431    Date(Box<DateTime<FixedOffset>>),
432    Nothing,
433}
434
435/// A redirection mode for the next call. See [`OutDest`](crate::OutDest).
436///
437/// This is generated by:
438///
439/// 1. Explicit redirection in a [`PipelineElement`](crate::ast::PipelineElement), or
440/// 2. The [`pipe_redirection()`](crate::engine::Command::pipe_redirection) of the command being
441///    piped into.
442///
443/// Not setting it uses the default, determined by [`Stack`](crate::engine::Stack).
444#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
445pub enum RedirectMode {
446    Pipe,
447    PipeSeparate,
448    Value,
449    Null,
450    Inherit,
451    Print,
452    /// Use the given numbered file.
453    File {
454        file_num: u32,
455    },
456    /// Use the redirection mode requested by the caller, for a pre-return call.
457    Caller,
458}
459
460/// Just a hack to allow `Arc<[u8]>` to be serialized and deserialized
461mod serde_arc_u8_array {
462    use serde::{Deserialize, Serialize};
463    use std::sync::Arc;
464
465    pub fn serialize<S>(data: &Arc<[u8]>, ser: S) -> Result<S::Ok, S::Error>
466    where
467        S: serde::Serializer,
468    {
469        data.as_ref().serialize(ser)
470    }
471
472    pub fn deserialize<'de, D>(de: D) -> Result<Arc<[u8]>, D::Error>
473    where
474        D: serde::Deserializer<'de>,
475    {
476        let data: Vec<u8> = Deserialize::deserialize(de)?;
477        Ok(data.into())
478    }
479}