Skip to main content

blockchain_zc_parser/
script.rs

1//! Bitcoin script parsing — zero-copy, no alloc.
2
3use crate::{
4    cursor::Cursor,
5    error::{ParseError, ParseResult},
6};
7
8/// Maximum standard script size (Bitcoin Core limit).
9pub const MAX_SCRIPT_SIZE: usize = 10_000;
10
11/// A raw locking or unlocking script.
12///
13/// All bytes borrow from the original parse buffer.
14#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub struct Script<'a> {
16    /// The raw opcodes + push-data bytes.
17    pub bytes: &'a [u8],
18}
19
20impl<'a> Script<'a> {
21    /// Parse a length-prefixed script from `cursor`.
22    #[inline]
23    pub fn parse(cursor: &mut Cursor<'a>) -> ParseResult<Self> {
24        let bytes = cursor.read_var_bytes(MAX_SCRIPT_SIZE)?;
25        Ok(Self { bytes })
26    }
27
28    /// Classify the script into a well-known output type.
29    #[inline]
30    pub fn script_type(&self) -> ScriptType<'a> {
31        ScriptType::classify(self.bytes)
32    }
33
34    /// Length of the script in bytes.
35    #[inline]
36    pub fn len(&self) -> usize {
37        self.bytes.len()
38    }
39
40    /// `true` for an empty (OP_0 / bare) script.
41    #[inline]
42    pub fn is_empty(&self) -> bool {
43        self.bytes.is_empty()
44    }
45
46    /// Iterate over parsed opcodes / push-data in this script.
47    pub fn instructions(&self) -> Instructions<'a> {
48        Instructions {
49            data: self.bytes,
50            pos: 0,
51            errored: false,
52        }
53    }
54}
55
56// ---------------------------------------------------------------------------
57// Script classification
58// ---------------------------------------------------------------------------
59
60/// Known standard output script types.
61#[derive(Debug, Clone, Copy, PartialEq, Eq)]
62pub enum ScriptType<'a> {
63    /// Pay-to-Public-Key-Hash: OP_DUP OP_HASH160 <20-byte hash> OP_EQUALVERIFY OP_CHECKSIG
64    P2PKH {
65        /// RIPEMD-160(SHA-256(pubkey)) hash embedded in the script.
66        pubkey_hash: &'a [u8; 20],
67    },
68    /// Pay-to-Script-Hash: OP_HASH160 <20-byte hash> OP_EQUAL
69    P2SH {
70        /// RIPEMD-160(SHA-256(redeem_script)) hash embedded in the script.
71        script_hash: &'a [u8; 20],
72    },
73    /// Pay-to-Witness-Public-Key-Hash: OP_0 <20-byte hash>
74    P2WPKH {
75        /// Witness program: RIPEMD-160(SHA-256(pubkey)).
76        pubkey_hash: &'a [u8; 20],
77    },
78    /// Pay-to-Witness-Script-Hash: OP_0 <32-byte hash>
79    P2WSH {
80        /// Witness program: SHA-256(witness_script).
81        script_hash: &'a [u8; 32],
82    },
83    /// Pay-to-Taproot: OP_1 <32-byte x-only pubkey>
84    P2TR {
85        /// Taproot output key (x-only, 32 bytes).
86        x_only_pubkey: &'a [u8; 32],
87    },
88    /// Pay-to-Public-Key: <compressed/uncompressed pubkey> OP_CHECKSIG
89    P2PK {
90        /// Raw public key bytes (33 bytes compressed or 65 bytes uncompressed).
91        pubkey: &'a [u8],
92    },
93    /// OP_RETURN / provably unspendable
94    OpReturn {
95        /// Arbitrary data payload following the OP_RETURN opcode.
96        data: &'a [u8],
97    },
98    /// Multisig: OP_m <pubkeys…> OP_n OP_CHECKMULTISIG
99    Multisig {
100        /// Minimum number of signatures required to spend.
101        required: u8,
102        /// Total number of public keys listed.
103        total: u8,
104    },
105    /// Anything else (non-standard / complex).
106    Unknown,
107}
108
109impl<'a> ScriptType<'a> {
110    fn classify(b: &'a [u8]) -> Self {
111        match b {
112            // P2PKH  25 bytes
113            [0x76, 0xa9, 0x14, hash @ .., 0x88, 0xac] if hash.len() == 20 => ScriptType::P2PKH {
114                pubkey_hash: hash.try_into().unwrap(),
115            },
116            // P2SH  23 bytes
117            [0xa9, 0x14, hash @ .., 0x87] if hash.len() == 20 => ScriptType::P2SH {
118                script_hash: hash.try_into().unwrap(),
119            },
120            // P2WPKH  22 bytes
121            [0x00, 0x14, hash @ ..] if hash.len() == 20 => ScriptType::P2WPKH {
122                pubkey_hash: hash.try_into().unwrap(),
123            },
124            // P2WSH  34 bytes
125            [0x00, 0x20, hash @ ..] if hash.len() == 32 => ScriptType::P2WSH {
126                script_hash: hash.try_into().unwrap(),
127            },
128            // P2TR  34 bytes  (OP_1 = 0x51, push 32 = 0x20)
129            [0x51, 0x20, pk @ ..] if pk.len() == 32 => ScriptType::P2TR {
130                x_only_pubkey: pk.try_into().unwrap(),
131            },
132            // P2PK compressed  35 bytes
133            [0x21, pk @ .., 0xac] if pk.len() == 33 => ScriptType::P2PK { pubkey: pk },
134            // P2PK uncompressed  67 bytes
135            [0x41, pk @ .., 0xac] if pk.len() == 65 => ScriptType::P2PK { pubkey: pk },
136            // OP_RETURN
137            [0x6a, rest @ ..] => ScriptType::OpReturn { data: rest },
138            _ => ScriptType::Unknown,
139        }
140    }
141}
142
143// ---------------------------------------------------------------------------
144// Instruction iterator
145// ---------------------------------------------------------------------------
146
147/// A single decoded script instruction.
148///
149/// This type is `Copy` — parse errors are surfaced as `Err(ParseError)` in
150/// the [`Instructions`] iterator rather than being embedded in the enum.
151#[derive(Debug, Clone, Copy, PartialEq, Eq)]
152pub enum Instruction<'a> {
153    /// A push-data instruction with the pushed bytes (may be empty for OP_0).
154    PushBytes(&'a [u8]),
155    /// A non-data opcode.
156    Op(u8),
157}
158
159/// Iterator over instructions in a [`Script`].
160///
161/// Yields `Ok(Instruction)` for each valid opcode/push-data and
162/// `Err(ParseError)` on the first malformed byte, after which it stops.
163pub struct Instructions<'a> {
164    data: &'a [u8],
165    pos: usize,
166    /// Becomes `true` after the first error so the iterator stops cleanly.
167    errored: bool,
168}
169
170impl<'a> Iterator for Instructions<'a> {
171    type Item = ParseResult<Instruction<'a>>;
172
173    fn next(&mut self) -> Option<Self::Item> {
174        if self.errored || self.pos >= self.data.len() {
175            return None;
176        }
177
178        let op = self.data[self.pos];
179        self.pos += 1;
180
181        // Resolve the number of push-data bytes that follow, or None for a
182        // plain opcode.  Returns Err if the length prefix itself is truncated.
183        let push_len: Option<Result<usize, ParseError>> = match op {
184            0x00 => Some(Ok(0)),
185            n @ 0x01..=0x4b => Some(Ok(n as usize)),
186            0x4c => {
187                // OP_PUSHDATA1: 1-byte length
188                let avail = self.data.len() - self.pos;
189                if avail < 1 {
190                    Some(Err(ParseError::UnexpectedEof {
191                        needed: 1,
192                        available: avail,
193                    }))
194                } else {
195                    let n = self.data[self.pos] as usize;
196                    self.pos += 1;
197                    Some(Ok(n))
198                }
199            }
200            0x4d => {
201                // OP_PUSHDATA2: 2-byte LE length
202                let avail = self.data.len() - self.pos;
203                if avail < 2 {
204                    Some(Err(ParseError::UnexpectedEof {
205                        needed: 2,
206                        available: avail,
207                    }))
208                } else {
209                    let n =
210                        u16::from_le_bytes([self.data[self.pos], self.data[self.pos + 1]]) as usize;
211                    self.pos += 2;
212                    Some(Ok(n))
213                }
214            }
215            0x4e => {
216                // OP_PUSHDATA4: 4-byte LE length
217                let avail = self.data.len() - self.pos;
218                if avail < 4 {
219                    Some(Err(ParseError::UnexpectedEof {
220                        needed: 4,
221                        available: avail,
222                    }))
223                } else {
224                    let n = u32::from_le_bytes([
225                        self.data[self.pos],
226                        self.data[self.pos + 1],
227                        self.data[self.pos + 2],
228                        self.data[self.pos + 3],
229                    ]) as usize;
230                    self.pos += 4;
231                    Some(Ok(n))
232                }
233            }
234            _ => None, // plain non-push opcode
235        };
236
237        match push_len {
238            None => Some(Ok(Instruction::Op(op))),
239            Some(Err(e)) => {
240                self.errored = true;
241                Some(Err(e))
242            }
243            Some(Ok(n)) => {
244                let avail = self.data.len() - self.pos;
245                if n > avail {
246                    self.errored = true;
247                    Some(Err(ParseError::UnexpectedEof {
248                        needed: n,
249                        available: avail,
250                    }))
251                } else {
252                    let bytes = &self.data[self.pos..self.pos + n];
253                    self.pos += n;
254                    Some(Ok(Instruction::PushBytes(bytes)))
255                }
256            }
257        }
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    extern crate std;
264    use super::*;
265    use crate::cursor::Cursor;
266    use std::vec::Vec;
267
268    #[test]
269    fn p2pkh_classification() {
270        // 25-byte P2PKH script
271        let script_bytes: &[u8] = &[
272            0x76, 0xa9, 0x14, 0x89, 0xab, 0xcd, 0xef, 0xab, 0xba, 0xab, 0xba, 0xab, 0xba, 0xab,
273            0xba, 0xab, 0xba, 0xab, 0xba, 0xab, 0xba, 0xab, 0xba, 0x88, 0xac,
274        ];
275        let mut raw = Vec::new();
276        // varint length prefix
277        raw.push(script_bytes.len() as u8);
278        raw.extend_from_slice(script_bytes);
279
280        let mut cursor = Cursor::new(&raw);
281        let script = Script::parse(&mut cursor).unwrap();
282        assert!(matches!(script.script_type(), ScriptType::P2PKH { .. }));
283    }
284
285    #[test]
286    fn op_return_classification() {
287        let script_bytes: &[u8] = &[0x6a, 0x04, 0xde, 0xad, 0xbe, 0xef];
288        let mut raw = Vec::new();
289        raw.push(script_bytes.len() as u8);
290        raw.extend_from_slice(script_bytes);
291        let mut cursor = Cursor::new(&raw);
292        let script = Script::parse(&mut cursor).unwrap();
293        assert!(matches!(script.script_type(), ScriptType::OpReturn { .. }));
294    }
295}