Skip to main content

bsv_script/interpreter/
parsed_opcode.rs

1//! Parsed opcode representation and script parser.
2
3use crate::opcodes::*;
4use crate::Script;
5use super::error::{InterpreterError, InterpreterErrorCode};
6
7/// A parsed opcode with its data payload.
8#[derive(Debug, Clone)]
9pub struct ParsedOpcode {
10    /// The opcode byte value.
11    pub opcode: u8,
12    /// The data payload associated with push opcodes (empty for non-push opcodes).
13    pub data: Vec<u8>,
14}
15
16impl ParsedOpcode {
17    /// Return the human-readable name of this opcode.
18    pub fn name(&self) -> &'static str {
19        crate::opcodes::opcode_to_string(self.opcode)
20    }
21
22    /// Return true if this opcode is disabled (OP_2MUL, OP_2DIV).
23    pub fn is_disabled(&self) -> bool {
24        matches!(self.opcode, OP_2MUL | OP_2DIV)
25    }
26
27    /// Return true if this opcode is always illegal (OP_VERIF, OP_VERNOTIF).
28    pub fn always_illegal(&self) -> bool {
29        matches!(self.opcode, OP_VERIF | OP_VERNOTIF)
30    }
31
32    /// Return true if this opcode is a conditional flow control opcode.
33    pub fn is_conditional(&self) -> bool {
34        matches!(
35            self.opcode,
36            OP_IF | OP_NOTIF | OP_ELSE | OP_ENDIF | OP_VERIF | OP_VERNOTIF
37        )
38    }
39
40    /// Return true if this opcode requires a transaction context to execute.
41    pub fn requires_tx(&self) -> bool {
42        matches!(
43            self.opcode,
44            OP_CHECKSIG
45                | OP_CHECKSIGVERIFY
46                | OP_CHECKMULTISIG
47                | OP_CHECKMULTISIGVERIFY
48                | OP_CHECKSEQUENCEVERIFY
49        )
50    }
51
52    /// Check that push uses minimal encoding.
53    pub fn enforce_minimum_data_push(&self) -> Result<(), InterpreterError> {
54        let data_len = self.data.len();
55        if data_len == 0 && self.opcode != OP_0 {
56            return Err(InterpreterError::new(
57                InterpreterErrorCode::MinimalData,
58                format!(
59                    "zero length data push is encoded with opcode {} instead of OP_0",
60                    self.name()
61                ),
62            ));
63        }
64        if data_len == 1 && (1..=16).contains(&self.data[0]) && self.opcode != OP_1 + self.data[0] - 1 {
65            return Err(InterpreterError::new(
66                InterpreterErrorCode::MinimalData,
67                format!(
68                    "data push of the value {} encoded with opcode {} instead of OP_{}",
69                    self.data[0],
70                    self.name(),
71                    self.data[0]
72                ),
73            ));
74        }
75        if data_len == 1 && self.data[0] == 0x81 && self.opcode != OP_1NEGATE {
76            return Err(InterpreterError::new(
77                InterpreterErrorCode::MinimalData,
78                format!(
79                    "data push of the value -1 encoded with opcode {} instead of OP_1NEGATE",
80                    self.name()
81                ),
82            ));
83        }
84        if data_len <= 75 {
85            if self.opcode as usize != data_len {
86                return Err(InterpreterError::new(
87                    InterpreterErrorCode::MinimalData,
88                    format!(
89                        "data push of {} bytes encoded with opcode {} instead of OP_DATA_{}",
90                        data_len,
91                        self.name(),
92                        data_len
93                    ),
94                ));
95            }
96        } else if data_len <= 255 {
97            if self.opcode != OP_PUSHDATA1 {
98                return Err(InterpreterError::new(
99                    InterpreterErrorCode::MinimalData,
100                    format!(
101                        "data push of {} bytes encoded with opcode {} instead of OP_PUSHDATA1",
102                        data_len,
103                        self.name()
104                    ),
105                ));
106            }
107        } else if data_len <= 65535 && self.opcode != OP_PUSHDATA2 {
108            return Err(InterpreterError::new(
109                InterpreterErrorCode::MinimalData,
110                format!(
111                    "data push of {} bytes encoded with opcode {} instead of OP_PUSHDATA2",
112                    data_len,
113                    self.name()
114                ),
115            ));
116        }
117        Ok(())
118    }
119
120    /// Check if this is a canonical push (matches the smallest push opcode).
121    pub fn canonical_push(&self) -> bool {
122        let opcode = self.opcode;
123        let data = &self.data;
124        let data_len = data.len();
125        if opcode > OP_16 {
126            return true;
127        }
128        if opcode < OP_PUSHDATA1 && opcode > OP_0 && data_len == 1 && data[0] <= 16 {
129            return false;
130        }
131        if opcode == OP_PUSHDATA1 && data_len < OP_PUSHDATA1 as usize {
132            return false;
133        }
134        if opcode == OP_PUSHDATA2 && data_len <= 0xff {
135            return false;
136        }
137        if opcode == OP_PUSHDATA4 && data_len <= 0xffff {
138            return false;
139        }
140        true
141    }
142
143    /// Serialize back to script bytes.
144    pub fn to_bytes(&self) -> Vec<u8> {
145        let mut out = vec![self.opcode];
146        if self.opcode == 0 || (self.opcode >= OP_1NEGATE && self.opcode <= OP_16) || self.opcode > OP_PUSHDATA4 {
147            // No data for these opcodes (except OP_RETURN which has special handling)
148            if self.opcode == OP_RETURN && !self.data.is_empty() {
149                out.extend_from_slice(&self.data);
150            }
151            return out;
152        }
153        // Push data opcodes
154        match self.opcode {
155            OP_PUSHDATA1 => {
156                out.push(self.data.len() as u8);
157                out.extend_from_slice(&self.data);
158            }
159            OP_PUSHDATA2 => {
160                out.extend_from_slice(&(self.data.len() as u16).to_le_bytes());
161                out.extend_from_slice(&self.data);
162            }
163            OP_PUSHDATA4 => {
164                out.extend_from_slice(&(self.data.len() as u32).to_le_bytes());
165                out.extend_from_slice(&self.data);
166            }
167            _ => {
168                // OP_DATA_1..OP_DATA_75
169                out.extend_from_slice(&self.data);
170            }
171        }
172        out
173    }
174}
175
176/// A parsed script is a sequence of parsed opcodes.
177pub type ParsedScript = Vec<ParsedOpcode>;
178
179/// Check if a parsed script is push-only.
180pub fn is_push_only(script: &ParsedScript) -> bool {
181    script.iter().all(|op| op.opcode <= OP_16)
182}
183
184/// Remove opcodes that push the given data.
185pub fn remove_opcode_by_data(script: &ParsedScript, data: &[u8]) -> ParsedScript {
186    script
187        .iter()
188        .filter(|pop| !pop.canonical_push() || !pop.data.windows(data.len()).any(|w| w == data))
189        .cloned()
190        .collect()
191}
192
193/// Remove all occurrences of a specific opcode.
194pub fn remove_opcode(script: &ParsedScript, opcode: u8) -> ParsedScript {
195    script
196        .iter()
197        .filter(|pop| pop.opcode != opcode)
198        .cloned()
199        .collect()
200}
201
202/// Unparse a ParsedScript back to a Script.
203pub fn unparse(pscript: &ParsedScript) -> Script {
204    let mut bytes = Vec::new();
205    for pop in pscript {
206        bytes.extend_from_slice(&pop.to_bytes());
207    }
208    Script::from_bytes(&bytes)
209}
210
211/// Parse a Script into a ParsedScript.
212///
213/// `error_on_checksig` - if true, returns error for checksig ops (when no tx available)
214pub fn parse_script(
215    script: &Script,
216    error_on_checksig: bool,
217) -> Result<ParsedScript, InterpreterError> {
218    let scr = script.to_bytes();
219    let mut parsed_ops = Vec::new();
220    let mut conditional_depth = 0i32;
221    let mut i = 0;
222
223    while i < scr.len() {
224        let instruction = scr[i];
225        let mut parsed_op = ParsedOpcode {
226            opcode: instruction,
227            data: Vec::new(),
228        };
229
230        if error_on_checksig && parsed_op.requires_tx() {
231            return Err(InterpreterError::new(
232                InterpreterErrorCode::InvalidParams,
233                "tx and previous output must be supplied for checksig".to_string(),
234            ));
235        }
236
237        // Track conditionals and check for OP_RETURN
238        match instruction {
239            OP_IF | OP_NOTIF | OP_VERIF | OP_VERNOTIF => conditional_depth += 1,
240            OP_ENDIF => {
241                if conditional_depth > 0 {
242                    conditional_depth -= 1;
243                }
244            }
245            OP_RETURN if conditional_depth == 0 => {
246                // OP_RETURN outside conditionals: consume remaining data
247                if i + 1 < scr.len() {
248                    parsed_op.data = scr[i + 1..].to_vec();
249                }
250                parsed_ops.push(parsed_op);
251                return Ok(parsed_ops);
252            }
253            _ => {}
254        }
255
256        // Extract data for this opcode
257        match instruction {
258            OP_PUSHDATA1 => {
259                if i + 1 >= scr.len() {
260                    return Err(InterpreterError::new(
261                        InterpreterErrorCode::MalformedPush,
262                        "script truncated".to_string(),
263                    ));
264                }
265                let data_len = scr[i + 1] as usize;
266                if i + 2 + data_len > scr.len() {
267                    return Err(InterpreterError::new(
268                        InterpreterErrorCode::MalformedPush,
269                        "push data exceeds script length".to_string(),
270                    ));
271                }
272                parsed_op.data = scr[i + 2..i + 2 + data_len].to_vec();
273                i += 2 + data_len;
274            }
275            OP_PUSHDATA2 => {
276                if i + 2 >= scr.len() {
277                    return Err(InterpreterError::new(
278                        InterpreterErrorCode::MalformedPush,
279                        "script truncated".to_string(),
280                    ));
281                }
282                let data_len =
283                    u16::from_le_bytes([scr[i + 1], scr[i + 2]]) as usize;
284                if i + 3 + data_len > scr.len() {
285                    return Err(InterpreterError::new(
286                        InterpreterErrorCode::MalformedPush,
287                        "push data exceeds script length".to_string(),
288                    ));
289                }
290                parsed_op.data = scr[i + 3..i + 3 + data_len].to_vec();
291                i += 3 + data_len;
292            }
293            OP_PUSHDATA4 => {
294                if i + 4 >= scr.len() {
295                    return Err(InterpreterError::new(
296                        InterpreterErrorCode::MalformedPush,
297                        "script truncated".to_string(),
298                    ));
299                }
300                let data_len = u32::from_le_bytes([
301                    scr[i + 1],
302                    scr[i + 2],
303                    scr[i + 3],
304                    scr[i + 4],
305                ]) as usize;
306                if i + 5 + data_len > scr.len() {
307                    return Err(InterpreterError::new(
308                        InterpreterErrorCode::MalformedPush,
309                        "push data exceeds script length".to_string(),
310                    ));
311                }
312                parsed_op.data = scr[i + 5..i + 5 + data_len].to_vec();
313                i += 5 + data_len;
314            }
315            op if op >= OP_DATA_1 && op <= OP_DATA_75 => {
316                let data_len = op as usize;
317                if i + 1 + data_len > scr.len() {
318                    return Err(InterpreterError::new(
319                        InterpreterErrorCode::MalformedPush,
320                        "script truncated".to_string(),
321                    ));
322                }
323                parsed_op.data = scr[i + 1..i + 1 + data_len].to_vec();
324                i += 1 + data_len;
325            }
326            _ => {
327                // Single-byte opcode
328                i += 1;
329            }
330        }
331
332        parsed_ops.push(parsed_op);
333    }
334
335    Ok(parsed_ops)
336}