Skip to main content

bsv_script/
chunk.rs

1//! Script chunk parsing and encoding.
2//!
3//! A script chunk is either an opcode or a data push with its associated bytes.
4//! This module handles decoding raw script bytes into structured chunks and
5//! encoding push data with the correct OP_PUSHDATA prefix.
6
7use crate::opcodes::*;
8use crate::ScriptError;
9
10/// A single parsed element of a Bitcoin script.
11///
12/// Each chunk is either a standalone opcode (like OP_DUP) or a data push
13/// that carries the opcode byte and the pushed data bytes.
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub struct ScriptChunk {
16    /// The opcode byte. For direct pushes (1-75 bytes), this is the length.
17    pub op: u8,
18    /// The data payload, if this chunk is a push operation.
19    pub data: Option<Vec<u8>>,
20}
21
22impl ScriptChunk {
23    /// Convert this chunk to its ASM string representation.
24    ///
25    /// Data push chunks are rendered as hex strings; non-push opcodes use
26    /// their canonical OP_xxx name.
27    ///
28    /// # Returns
29    /// A string suitable for inclusion in a space-separated ASM output.
30    pub fn to_asm_string(&self) -> String {
31        if self.op > OP_0 && self.op <= OP_PUSHDATA4 {
32            if let Some(ref data) = self.data {
33                return hex::encode(data);
34            }
35        }
36        opcode_to_string(self.op).to_string()
37    }
38}
39
40/// Decode raw script bytes into a vector of `ScriptChunk` values.
41///
42/// Handles OP_DATA_1..OP_DATA_75 (direct push), OP_PUSHDATA1/2/4
43/// (extended push), and OP_RETURN (consumes remaining bytes as data
44/// unless inside a conditional block).
45///
46/// # Arguments
47/// * `bytes` - The raw script bytes to decode.
48///
49/// # Returns
50/// A vector of parsed chunks, or a `ScriptError` if the data is truncated.
51pub fn decode_script(bytes: &[u8]) -> Result<Vec<ScriptChunk>, ScriptError> {
52    let mut chunks = Vec::new();
53    let mut pos = 0;
54    let mut conditional_block: i32 = 0;
55
56    while pos < bytes.len() {
57        let op = bytes[pos];
58
59        match op {
60            OP_IF | OP_NOTIF | OP_VERIF | OP_VERNOTIF => {
61                conditional_block += 1;
62                chunks.push(ScriptChunk { op, data: None });
63                pos += 1;
64            }
65            OP_ENDIF => {
66                conditional_block -= 1;
67                chunks.push(ScriptChunk { op, data: None });
68                pos += 1;
69            }
70            OP_RETURN => {
71                if conditional_block > 0 {
72                    chunks.push(ScriptChunk { op, data: None });
73                    pos += 1;
74                } else {
75                    // Consume the rest of the script as data attached to OP_RETURN.
76                    let data = bytes[pos..].to_vec();
77                    chunks.push(ScriptChunk { op, data: Some(data) });
78                    pos = bytes.len();
79                }
80            }
81            OP_PUSHDATA1 => {
82                if bytes.len() < pos + 2 {
83                    return Err(ScriptError::DataTooSmall);
84                }
85                let length = bytes[pos + 1] as usize;
86                pos += 2;
87                if bytes.len() < pos + length {
88                    return Err(ScriptError::DataTooSmall);
89                }
90                let data = bytes[pos..pos + length].to_vec();
91                chunks.push(ScriptChunk { op, data: Some(data) });
92                pos += length;
93            }
94            OP_PUSHDATA2 => {
95                if bytes.len() < pos + 3 {
96                    return Err(ScriptError::DataTooSmall);
97                }
98                let length = u16::from_le_bytes([bytes[pos + 1], bytes[pos + 2]]) as usize;
99                pos += 3;
100                if bytes.len() < pos + length {
101                    return Err(ScriptError::DataTooSmall);
102                }
103                let data = bytes[pos..pos + length].to_vec();
104                chunks.push(ScriptChunk { op, data: Some(data) });
105                pos += length;
106            }
107            OP_PUSHDATA4 => {
108                if bytes.len() < pos + 5 {
109                    return Err(ScriptError::DataTooSmall);
110                }
111                let length = u32::from_le_bytes([
112                    bytes[pos + 1],
113                    bytes[pos + 2],
114                    bytes[pos + 3],
115                    bytes[pos + 4],
116                ]) as usize;
117                pos += 5;
118                if bytes.len() < pos + length {
119                    return Err(ScriptError::DataTooSmall);
120                }
121                let data = bytes[pos..pos + length].to_vec();
122                chunks.push(ScriptChunk { op, data: Some(data) });
123                pos += length;
124            }
125            0x01..=0x4b => {
126                // Direct push: op byte is the number of bytes to push.
127                let length = op as usize;
128                if bytes.len() < pos + 1 + length {
129                    return Err(ScriptError::DataTooSmall);
130                }
131                let data = bytes[pos + 1..pos + 1 + length].to_vec();
132                chunks.push(ScriptChunk { op, data: Some(data) });
133                pos += 1 + length;
134            }
135            _ => {
136                chunks.push(ScriptChunk { op, data: None });
137                pos += 1;
138            }
139        }
140    }
141
142    Ok(chunks)
143}
144
145/// Compute the OP_PUSHDATA prefix bytes for a data payload of the given length.
146///
147/// Returns the prefix that should be prepended to the data when encoding
148/// a push operation into raw script bytes.
149///
150/// # Arguments
151/// * `data_len` - The length of the data to be pushed.
152///
153/// # Returns
154/// A byte vector containing the appropriate prefix, or an error if the data
155/// is too large for the protocol.
156pub fn push_data_prefix(data_len: usize) -> Result<Vec<u8>, ScriptError> {
157    if data_len <= 75 {
158        Ok(vec![data_len as u8])
159    } else if data_len <= 0xFF {
160        Ok(vec![OP_PUSHDATA1, data_len as u8])
161    } else if data_len <= 0xFFFF {
162        let mut buf = vec![OP_PUSHDATA2];
163        buf.extend_from_slice(&(data_len as u16).to_le_bytes());
164        Ok(buf)
165    } else if data_len <= 0xFFFFFFFF {
166        let mut buf = vec![OP_PUSHDATA4];
167        buf.extend_from_slice(&(data_len as u32).to_le_bytes());
168        Ok(buf)
169    } else {
170        Err(ScriptError::DataTooBig)
171    }
172}
173
174/// Encode multiple data payloads into a single byte vector with push prefixes.
175///
176/// Each element in `parts` gets its own OP_PUSHDATA prefix based on length.
177///
178/// # Arguments
179/// * `parts` - Slice of data byte slices to encode.
180///
181/// # Returns
182/// A byte vector containing all pushes concatenated, or an error if any
183/// part is too large.
184pub fn encode_push_datas(parts: &[&[u8]]) -> Result<Vec<u8>, ScriptError> {
185    let mut result = Vec::new();
186    for (i, part) in parts.iter().enumerate() {
187        let prefix = push_data_prefix(part.len())
188            .map_err(|_| ScriptError::PartTooBig(i))?;
189        result.extend_from_slice(&prefix);
190        result.extend_from_slice(part);
191    }
192    Ok(result)
193}
194
195#[cfg(test)]
196mod tests {
197    //! Tests for script chunk decoding and push data encoding.
198    //!
199    //! Covers decode_script with simple, complex, and malformed inputs,
200    //! push_data_prefix boundary sizes, encode_push_datas roundtrips,
201    //! and OP_PUSHDATA1/2/4 error cases. Test vectors are derived from
202    //! the Go SDK reference implementation.
203
204    use super::*;
205
206    // -----------------------------------------------------------------------
207    // decode_script - basic cases
208    // -----------------------------------------------------------------------
209
210    /// Decode a script with three simple push chunks and verify count.
211    #[test]
212    fn test_decode_script_simple() {
213        let script_hex = "05000102030401FF02ABCD";
214        let bytes = hex::decode(script_hex).expect("valid hex");
215        let parts = decode_script(&bytes).expect("should decode");
216        assert_eq!(parts.len(), 3);
217    }
218
219    /// Decode and re-encode a simple script to verify roundtrip fidelity.
220    #[test]
221    fn test_decode_and_encode_roundtrip() {
222        let script_hex = "05000102030401FF02ABCD";
223        let bytes = hex::decode(script_hex).expect("valid hex");
224        let parts = decode_script(&bytes).expect("should decode");
225        assert_eq!(parts.len(), 3);
226
227        // Re-encode: gather the data from each chunk
228        let data_parts: Vec<&[u8]> = parts
229            .iter()
230            .filter_map(|p| p.data.as_deref())
231            .collect();
232        let encoded = encode_push_datas(&data_parts).expect("should encode");
233        assert_eq!(hex::encode(&encoded), script_hex.to_lowercase());
234    }
235
236    /// Decode an empty byte slice returns an empty chunk vector.
237    #[test]
238    fn test_decode_script_empty() {
239        let parts = decode_script(&[]).expect("should decode");
240        assert!(parts.is_empty());
241    }
242
243    /// Decode a complex multisig-like script with OP_PUSHDATA1 chunks.
244    #[test]
245    fn test_decode_script_complex() {
246        let script_hex = "524c53ff0488b21e000000000000000000362f7a9030543db8751401c387d6a71e870f1895b3a62569d455e8ee5f5f5e5f03036624c6df96984db6b4e625b6707c017eb0e0d137cd13a0c989bfa77a4473fd000000004c53ff0488b21e0000000000000000008b20425398995f3c866ea6ce5c1828a516b007379cf97b136bffbdc86f75df14036454bad23b019eae34f10aff8b8d6d8deb18cb31354e5a169ee09d8a4560e8250000000052ae";
247        let bytes = hex::decode(script_hex).expect("valid hex");
248        let parts = decode_script(&bytes).expect("should decode");
249        assert_eq!(parts.len(), 5);
250    }
251
252    // -----------------------------------------------------------------------
253    // decode_script - error / truncation cases
254    // -----------------------------------------------------------------------
255
256    /// Verify that a truncated direct-push script returns DataTooSmall.
257    #[test]
258    fn test_decode_script_bad_parts() {
259        // 0x05 says "push 5 bytes" but only 3 bytes follow
260        let bytes = hex::decode("05000000").expect("valid hex");
261        let result = decode_script(&bytes);
262        assert!(result.is_err());
263    }
264
265    /// Verify that a truncated OP_PUSHDATA1 script returns DataTooSmall.
266    #[test]
267    fn test_decode_script_invalid_pushdata1() {
268        // OP_PUSHDATA1 = 0x4c, claims 5 bytes but only 4 follow
269        let bytes = hex::decode("4c05000000").expect("valid hex");
270        let result = decode_script(&bytes);
271        assert!(result.is_err());
272    }
273
274    /// Verify OP_PUSHDATA1 with a valid data payload decodes correctly.
275    #[test]
276    fn test_decode_script_pushdata1_valid() {
277        let data = b"testing";
278        let mut script_bytes = vec![OP_PUSHDATA1, data.len() as u8];
279        script_bytes.extend_from_slice(data);
280        let parts = decode_script(&script_bytes).expect("should decode");
281        assert_eq!(parts.len(), 1);
282        assert_eq!(parts[0].op, OP_PUSHDATA1);
283        assert_eq!(parts[0].data.as_ref().unwrap(), data);
284    }
285
286    /// Verify OP_PUSHDATA1 alone (no length byte) returns an error.
287    #[test]
288    fn test_decode_script_pushdata1_missing_payload() {
289        let result = decode_script(&[OP_PUSHDATA1]);
290        assert!(result.is_err());
291    }
292
293    /// Verify OP_PUSHDATA2 alone returns an error.
294    #[test]
295    fn test_decode_script_pushdata2_missing_payload() {
296        let result = decode_script(&[OP_PUSHDATA2]);
297        assert!(result.is_err());
298    }
299
300    /// Verify OP_PUSHDATA2 with insufficient data returns an error.
301    #[test]
302    fn test_decode_script_pushdata2_too_small() {
303        let data = b"testing PUSHDATA2";
304        let mut script_bytes = vec![OP_PUSHDATA2, data.len() as u8];
305        script_bytes.extend_from_slice(data);
306        // Only 1 length byte instead of 2 -- OP_PUSHDATA2 needs 2 bytes for length
307        let result = decode_script(&script_bytes);
308        assert!(result.is_err());
309    }
310
311    /// Verify OP_PUSHDATA4 alone returns an error.
312    #[test]
313    fn test_decode_script_pushdata4_missing_payload() {
314        let result = decode_script(&[OP_PUSHDATA4]);
315        assert!(result.is_err());
316    }
317
318    /// Verify OP_PUSHDATA4 with insufficient data returns an error.
319    #[test]
320    fn test_decode_script_pushdata4_too_small() {
321        let data = b"testing PUSHDATA4";
322        let mut script_bytes = vec![OP_PUSHDATA4, data.len() as u8];
323        script_bytes.extend_from_slice(data);
324        // Only 1 length byte instead of 4 -- will fail
325        let result = decode_script(&script_bytes);
326        assert!(result.is_err());
327    }
328
329    // -----------------------------------------------------------------------
330    // push_data_prefix boundary tests
331    // -----------------------------------------------------------------------
332
333    /// Verify push_data_prefix returns a 1-byte prefix for data <= 75 bytes.
334    #[test]
335    fn test_push_data_prefix_small() {
336        let prefix = push_data_prefix(20).expect("should succeed");
337        assert_eq!(prefix, vec![20u8]);
338    }
339
340    /// Verify push_data_prefix returns a 1-byte prefix at the 75-byte boundary.
341    #[test]
342    fn test_push_data_prefix_75() {
343        let prefix = push_data_prefix(75).expect("should succeed");
344        assert_eq!(prefix, vec![75u8]);
345    }
346
347    /// Verify push_data_prefix returns OP_PUSHDATA1 prefix for 76..=255 bytes.
348    #[test]
349    fn test_push_data_prefix_pushdata1() {
350        let prefix = push_data_prefix(76).expect("should succeed");
351        assert_eq!(prefix, vec![OP_PUSHDATA1, 76]);
352    }
353
354    /// Verify push_data_prefix returns OP_PUSHDATA1 prefix at the 255-byte boundary.
355    #[test]
356    fn test_push_data_prefix_255() {
357        let prefix = push_data_prefix(255).expect("should succeed");
358        assert_eq!(prefix, vec![OP_PUSHDATA1, 255]);
359    }
360
361    /// Verify push_data_prefix returns OP_PUSHDATA2 prefix for 256..=65535 bytes.
362    #[test]
363    fn test_push_data_prefix_pushdata2() {
364        let prefix = push_data_prefix(256).expect("should succeed");
365        assert_eq!(prefix, vec![OP_PUSHDATA2, 0x00, 0x01]);
366    }
367
368    /// Verify push_data_prefix returns OP_PUSHDATA2 prefix at the 65535-byte boundary.
369    #[test]
370    fn test_push_data_prefix_65535() {
371        let prefix = push_data_prefix(65535).expect("should succeed");
372        assert_eq!(prefix, vec![OP_PUSHDATA2, 0xFF, 0xFF]);
373    }
374
375    /// Verify push_data_prefix returns OP_PUSHDATA4 prefix for 65536+ bytes.
376    #[test]
377    fn test_push_data_prefix_pushdata4() {
378        let prefix = push_data_prefix(65536).expect("should succeed");
379        assert_eq!(prefix, vec![OP_PUSHDATA4, 0x00, 0x00, 0x01, 0x00]);
380    }
381
382    // -----------------------------------------------------------------------
383    // encode_push_datas
384    // -----------------------------------------------------------------------
385
386    /// Verify encode_push_datas concatenates multiple pushes correctly.
387    #[test]
388    fn test_encode_push_datas_multiple() {
389        let parts: Vec<&[u8]> = vec![b"hello", b"world"];
390        let encoded = encode_push_datas(&parts).expect("should encode");
391        // "hello" is 5 bytes -> prefix 0x05, "world" is 5 bytes -> prefix 0x05
392        let expected = hex::decode("0568656c6c6f05776f726c64").expect("valid hex");
393        assert_eq!(encoded, expected);
394    }
395
396    /// Verify encode_push_datas with an empty parts list returns empty bytes.
397    #[test]
398    fn test_encode_push_datas_empty() {
399        let parts: Vec<&[u8]> = vec![];
400        let encoded = encode_push_datas(&parts).expect("should encode");
401        assert!(encoded.is_empty());
402    }
403
404    // -----------------------------------------------------------------------
405    // ScriptChunk::to_asm_string
406    // -----------------------------------------------------------------------
407
408    /// Verify that a data-push chunk renders as hex in ASM output.
409    #[test]
410    fn test_chunk_to_asm_string_data() {
411        let chunk = ScriptChunk {
412            op: OP_DATA_20,
413            data: Some(vec![0xAB; 20]),
414        };
415        let asm = chunk.to_asm_string();
416        assert_eq!(asm, "ab".repeat(20));
417    }
418
419    /// Verify that a non-push opcode chunk renders as its OP_xxx name.
420    #[test]
421    fn test_chunk_to_asm_string_opcode() {
422        let chunk = ScriptChunk {
423            op: OP_DUP,
424            data: None,
425        };
426        assert_eq!(chunk.to_asm_string(), "OP_DUP");
427    }
428}