Skip to main content

wasm_opcode_table/
instructions.rs

1//! Schema for [`instructions.toml`](../instructions.toml).
2
3use serde::{Deserialize, Deserializer};
4
5/// Root document: `[[instructions]]` array-of-tables.
6#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
7pub struct InstructionsTable {
8    pub instructions: Vec<Instruction>,
9}
10
11/// A single WebAssembly instruction / opcode entry.
12#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
13pub struct Instruction {
14    pub name: String,
15    #[serde(default)]
16    pub variant: Option<String>,
17    pub opcode: Opcode,
18    pub category: String,
19    #[serde(default)]
20    pub immediates: Option<Vec<Immediate>>,
21    #[serde(default, rename = "stack-type")]
22    pub stack_type: Option<StackType>,
23    #[serde(default)]
24    pub feature: Option<String>,
25    #[serde(default)]
26    pub since: Option<String>,
27}
28
29/// Opcode as written in TOML: an integer (`0x6A`) or a two-element array (`[0xFC, 17]`).
30#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
31pub enum Opcode {
32    /// `opcode = 0xNN`
33    Single(u8),
34    /// `opcode = [prefix, index]` — prefix byte and opcode index.
35    Multi(u8, u32),
36}
37
38impl<'de> Deserialize<'de> for Opcode {
39    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
40    where
41        D: Deserializer<'de>,
42    {
43        serde_helpers::deserialize_opcode(deserializer)
44    }
45}
46
47/// Immediate operand in the instruction encoding.
48#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
49pub struct Immediate {
50    #[serde(rename = "type")]
51    pub ty: String,
52    #[serde(default)]
53    pub name: Option<String>,
54    #[serde(default, rename = "binary-order")]
55    pub binary_order: Option<u64>,
56}
57
58/// Stack signature (`stack-type.from` / `stack-type.to`).
59#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
60pub struct StackType {
61    pub from: Vec<StackEntry>,
62    pub to: Vec<StackEntry>,
63}
64
65/// Symbolic or concrete type expression in a stack slot descriptor.
66#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
67pub struct TypeExpr(pub String);
68
69/// Control construct pushed on the control stack (`block` / `loop` / `if`).
70#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
71pub struct ControlFrame {
72    pub control: ControlKind,
73    pub start: TypeExpr,
74    pub end: TypeExpr,
75    pub label: LabelTarget,
76}
77
78/// WebAssembly control instruction kind.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
80#[serde(rename_all = "lowercase")]
81pub enum ControlKind {
82    Block,
83    Loop,
84    If,
85}
86
87/// Branch target for a control frame: break (`end`) or continue (`start`).
88#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
89#[serde(rename_all = "lowercase")]
90pub enum LabelTarget {
91    /// Branch exits the construct (break) — `label = "end"` in TOML.
92    End,
93    /// Branch targets the head of a loop (continue) — `label = "start"` in TOML.
94    Start,
95}
96
97/// One stack slot descriptor in `stack-type.from` / `stack-type.to`.
98#[derive(Debug, Clone, PartialEq, Eq)]
99pub enum StackEntry {
100    Type(TypeExpr),
101    TypeOf(TypeExpr),
102    TypesOf(TypeExpr),
103    Unreachable,
104    Control(ControlFrame),
105}
106
107impl<'de> Deserialize<'de> for StackEntry {
108    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
109    where
110        D: Deserializer<'de>,
111    {
112        serde_helpers::deserialize_stack_entry(deserializer)
113    }
114}
115
116mod serde_helpers {
117    use super::{ControlFrame, Opcode, StackEntry, TypeExpr};
118    use serde::de;
119    use serde::{Deserialize, Deserializer};
120
121    pub(super) fn deserialize_opcode<'de, D>(deserializer: D) -> Result<Opcode, D::Error>
122    where
123        D: Deserializer<'de>,
124    {
125        OpcodeRaw::deserialize(deserializer).and_then(|raw| {
126            raw.try_into()
127                .map_err(|e: OpcodeTomlError| de::Error::custom(e))
128        })
129    }
130
131    #[derive(Deserialize)]
132    #[serde(untagged)]
133    enum OpcodeRaw {
134        Single(u64),
135        Multi(Vec<u64>),
136    }
137
138    #[derive(Debug, Clone, PartialEq, Eq)]
139    enum OpcodeTomlError {
140        SingleOutOfRange(u64),
141        MultiWrongLength(usize),
142        MultiOutOfRange { prefix: u64, index: u64 },
143    }
144
145    impl std::fmt::Display for OpcodeTomlError {
146        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
147            match self {
148                Self::SingleOutOfRange(v) => write!(f, "single opcode {v} does not fit in u8"),
149                Self::MultiWrongLength(len) => {
150                    write!(f, "multi opcode must have exactly 2 elements, got {len}")
151                }
152                Self::MultiOutOfRange { prefix, index } => write!(
153                    f,
154                    "multi opcode [{prefix}, {index}] does not fit in (u8, u32)"
155                ),
156            }
157        }
158    }
159
160    impl TryFrom<OpcodeRaw> for Opcode {
161        type Error = OpcodeTomlError;
162
163        fn try_from(raw: OpcodeRaw) -> Result<Self, Self::Error> {
164            match raw {
165                OpcodeRaw::Single(value) => {
166                    let byte = u8::try_from(value).map_err(|_| OpcodeTomlError::SingleOutOfRange(value))?;
167                    Ok(Self::Single(byte))
168                }
169                OpcodeRaw::Multi(values) => {
170                    let [prefix, index] = values
171                        .try_into()
172                        .map_err(|v: Vec<u64>| OpcodeTomlError::MultiWrongLength(v.len()))?;
173                    let prefix = u8::try_from(prefix).map_err(|_| OpcodeTomlError::MultiOutOfRange {
174                        prefix,
175                        index,
176                    })?;
177                    let index = u32::try_from(index).map_err(|_| OpcodeTomlError::MultiOutOfRange {
178                        prefix: u64::from(prefix),
179                        index,
180                    })?;
181                    Ok(Self::Multi(prefix, index))
182                }
183            }
184        }
185    }
186
187    pub(super) fn deserialize_stack_entry<'de, D>(deserializer: D) -> Result<StackEntry, D::Error>
188    where
189        D: Deserializer<'de>,
190    {
191        StackEntryRaw::deserialize(deserializer).map(Into::into)
192    }
193
194    /// Variant order matches most-specific-first for untagged matching.
195    #[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
196    #[serde(untagged)]
197    enum StackEntryRaw {
198        Control(ControlFrame),
199        Unreachable(UnreachableEntry),
200        TypeOf(TypeOfEntry),
201        TypesOf(TypesOfEntry),
202        Type(TypeEntry),
203    }
204
205    #[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
206    struct TypeEntry {
207        #[serde(rename = "type")]
208        ty: TypeExpr,
209    }
210
211    #[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
212    struct TypeOfEntry {
213        #[serde(rename = "type-of")]
214        expr: TypeExpr,
215    }
216
217    #[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
218    struct TypesOfEntry {
219        #[serde(rename = "types-of")]
220        expr: TypeExpr,
221    }
222
223    #[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
224    struct UnreachableEntry {
225        #[serde(
226            rename = "unreachable",
227            deserialize_with = "deserialize_unreachable_true"
228        )]
229        _marker: (),
230    }
231
232    impl From<StackEntryRaw> for StackEntry {
233        fn from(raw: StackEntryRaw) -> Self {
234            match raw {
235                StackEntryRaw::Control(frame) => StackEntry::Control(frame),
236                StackEntryRaw::Unreachable(_) => StackEntry::Unreachable,
237                StackEntryRaw::TypeOf(e) => StackEntry::TypeOf(e.expr),
238                StackEntryRaw::TypesOf(e) => StackEntry::TypesOf(e.expr),
239                StackEntryRaw::Type(e) => StackEntry::Type(e.ty),
240            }
241        }
242    }
243
244    fn deserialize_unreachable_true<'de, D>(deserializer: D) -> Result<(), D::Error>
245    where
246        D: Deserializer<'de>,
247    {
248        if bool::deserialize(deserializer)? {
249            Ok(())
250        } else {
251            Err(de::Error::custom("unreachable must be true"))
252        }
253    }
254}
255
256/// Validation error for stack-type invariants not expressible in the type system.
257#[derive(Debug, Clone, PartialEq, Eq)]
258pub enum ValidateError {
259    ControlLabelMismatch {
260        control: ControlKind,
261        label: LabelTarget,
262    },
263}
264
265/// Check control-frame label invariants (`loop` → `Start`, `block`/`if` → `End`).
266///
267/// # Errors
268///
269/// Returns [`ValidateError::ControlLabelMismatch`] when a control frame's `label`
270/// does not match its `control` kind.
271pub fn validate_stack_entry(entry: &StackEntry) -> Result<(), ValidateError> {
272    let StackEntry::Control(frame) = entry else {
273        return Ok(());
274    };
275    let ok = matches!(
276        (frame.control, frame.label),
277        (ControlKind::Loop, LabelTarget::Start)
278            | (ControlKind::Block | ControlKind::If, LabelTarget::End)
279    );
280    if ok {
281        Ok(())
282    } else {
283        Err(ValidateError::ControlLabelMismatch {
284            control: frame.control,
285            label: frame.label,
286        })
287    }
288}
289
290/// Validate every stack entry in the table.
291///
292/// # Errors
293///
294/// Returns the first [`ValidateError`] produced by [`validate_stack_entry`].
295pub fn validate_instructions_table(table: &InstructionsTable) -> Result<(), ValidateError> {
296    for instruction in &table.instructions {
297        if let Some(stack) = &instruction.stack_type {
298            for entry in stack.from.iter().chain(&stack.to) {
299                validate_stack_entry(entry)?;
300            }
301        }
302    }
303    Ok(())
304}
305
306/// Parse TOML source into an [`InstructionsTable`].
307///
308/// # Errors
309///
310/// Returns a TOML deserialization error when `source` is invalid or does not match the schema.
311pub fn parse_instructions_toml(source: &str) -> Result<InstructionsTable, toml::de::Error> {
312    toml::from_str(source)
313}
314
315#[cfg(feature = "instructions-toml")]
316mod embedded {
317    use std::sync::OnceLock;
318
319    use super::{InstructionsTable, parse_instructions_toml};
320
321    /// Raw TOML embedded at compile time from `instructions.toml` in the package root.
322    pub const INSTRUCTIONS_TOML: &str =
323        include_str!(concat!(env!("CARGO_MANIFEST_DIR"), "/instructions.toml"));
324
325    static PARSED: OnceLock<InstructionsTable> = OnceLock::new();
326
327    /// Lazily parsed embedded instruction table.
328    ///
329    /// # Panics
330    ///
331    /// Panics if the embedded `instructions.toml` does not match the schema. The embedded
332    /// file is validated at build time by design and should always parse successfully.
333    pub fn instructions() -> &'static InstructionsTable {
334        PARSED.get_or_init(|| {
335            parse_instructions_toml(INSTRUCTIONS_TOML)
336                .expect("embedded instructions.toml must match schema")
337        })
338    }
339}
340
341#[cfg(feature = "instructions-toml")]
342pub use embedded::{INSTRUCTIONS_TOML, instructions};