Skip to main content

bsv_script/
chunk.rs

1//! Script chunk parsing and encoding.
2//!
3//! A script chunk is either an opcode or a data push with its associated bytes.
4//! This module handles decoding raw script bytes into structured chunks and
5//! encoding push data with the correct OP_PUSHDATA prefix.
6
7use crate::opcodes::*;
8use crate::ScriptError;
9
10/// A single parsed element of a Bitcoin script.
11///
12/// Each chunk is either a standalone opcode (like OP_DUP) or a data push
13/// that carries the opcode byte and the pushed data bytes.
14#[derive(Clone, Debug, PartialEq, Eq)]
15pub struct ScriptChunk {
16    /// The opcode byte. For direct pushes (1-75 bytes), this is the length.
17    pub op: u8,
18    /// The data payload, if this chunk is a push operation.
19    pub data: Option<Vec<u8>>,
20}
21
22impl ScriptChunk {
23    /// Convert this chunk to its ASM string representation.
24    ///
25    /// Data push chunks are rendered as hex strings; non-push opcodes use
26    /// their canonical OP_xxx name.
27    ///
28    /// # Returns
29    /// A string suitable for inclusion in a space-separated ASM output.
30    pub fn to_asm_string(&self) -> String {
31        if self.op > OP_0 && self.op <= OP_PUSHDATA4 {
32            if let Some(ref data) = self.data {
33                return hex::encode(data);
34            }
35        }
36        opcode_to_string(self.op).to_string()
37    }
38}
39
40/// Decode raw script bytes into a vector of `ScriptChunk` values.
41///
42/// Handles OP_DATA_1..OP_DATA_75 (direct push), OP_PUSHDATA1/2/4
43/// (extended push), and OP_RETURN (consumes remaining bytes as data
44/// unless inside a conditional block).
45///
46/// # Arguments
47/// * `bytes` - The raw script bytes to decode.
48///
49/// # Returns
50/// A vector of parsed chunks, or a `ScriptError` if the data is truncated.
51pub fn decode_script(bytes: &[u8]) -> Result<Vec<ScriptChunk>, ScriptError> {
52    let mut chunks = Vec::new();
53    let mut pos = 0;
54    let mut conditional_block: i32 = 0;
55
56    while pos < bytes.len() {
57        let op = bytes[pos];
58
59        match op {
60            OP_IF | OP_NOTIF | OP_VERIF | OP_VERNOTIF => {
61                conditional_block += 1;
62                chunks.push(ScriptChunk { op, data: None });
63                pos += 1;
64            }
65            OP_ENDIF => {
66                conditional_block -= 1;
67                chunks.push(ScriptChunk { op, data: None });
68                pos += 1;
69            }
70            OP_RETURN => {
71                if conditional_block > 0 {
72                    chunks.push(ScriptChunk { op, data: None });
73                    pos += 1;
74                } else {
75                    // Consume the rest of the script as data attached to OP_RETURN.
76                    let data = bytes[pos..].to_vec();
77                    chunks.push(ScriptChunk {
78                        op,
79                        data: Some(data),
80                    });
81                    pos = bytes.len();
82                }
83            }
84            OP_PUSHDATA1 => {
85                if bytes.len() < pos + 2 {
86                    return Err(ScriptError::DataTooSmall);
87                }
88                let length = bytes[pos + 1] as usize;
89                pos += 2;
90                if bytes.len() < pos + length {
91                    return Err(ScriptError::DataTooSmall);
92                }
93                let data = bytes[pos..pos + length].to_vec();
94                chunks.push(ScriptChunk {
95                    op,
96                    data: Some(data),
97                });
98                pos += length;
99            }
100            OP_PUSHDATA2 => {
101                if bytes.len() < pos + 3 {
102                    return Err(ScriptError::DataTooSmall);
103                }
104                let length = u16::from_le_bytes([bytes[pos + 1], bytes[pos + 2]]) as usize;
105                pos += 3;
106                if bytes.len() < pos + length {
107                    return Err(ScriptError::DataTooSmall);
108                }
109                let data = bytes[pos..pos + length].to_vec();
110                chunks.push(ScriptChunk {
111                    op,
112                    data: Some(data),
113                });
114                pos += length;
115            }
116            OP_PUSHDATA4 => {
117                if bytes.len() < pos + 5 {
118                    return Err(ScriptError::DataTooSmall);
119                }
120                let length = u32::from_le_bytes([
121                    bytes[pos + 1],
122                    bytes[pos + 2],
123                    bytes[pos + 3],
124                    bytes[pos + 4],
125                ]) as usize;
126                pos += 5;
127                if bytes.len() < pos + length {
128                    return Err(ScriptError::DataTooSmall);
129                }
130                let data = bytes[pos..pos + length].to_vec();
131                chunks.push(ScriptChunk {
132                    op,
133                    data: Some(data),
134                });
135                pos += length;
136            }
137            0x01..=0x4b => {
138                // Direct push: op byte is the number of bytes to push.
139                let length = op as usize;
140                if bytes.len() < pos + 1 + length {
141                    return Err(ScriptError::DataTooSmall);
142                }
143                let data = bytes[pos + 1..pos + 1 + length].to_vec();
144                chunks.push(ScriptChunk {
145                    op,
146                    data: Some(data),
147                });
148                pos += 1 + length;
149            }
150            _ => {
151                chunks.push(ScriptChunk { op, data: None });
152                pos += 1;
153            }
154        }
155    }
156
157    Ok(chunks)
158}
159
160/// Compute the OP_PUSHDATA prefix bytes for a data payload of the given length.
161///
162/// Returns the prefix that should be prepended to the data when encoding
163/// a push operation into raw script bytes.
164///
165/// # Arguments
166/// * `data_len` - The length of the data to be pushed.
167///
168/// # Returns
169/// A byte vector containing the appropriate prefix, or an error if the data
170/// is too large for the protocol.
171pub fn push_data_prefix(data_len: usize) -> Result<Vec<u8>, ScriptError> {
172    if data_len <= 75 {
173        Ok(vec![data_len as u8])
174    } else if data_len <= 0xFF {
175        Ok(vec![OP_PUSHDATA1, data_len as u8])
176    } else if data_len <= 0xFFFF {
177        let mut buf = vec![OP_PUSHDATA2];
178        buf.extend_from_slice(&(data_len as u16).to_le_bytes());
179        Ok(buf)
180    } else if data_len <= 0xFFFFFFFF {
181        let mut buf = vec![OP_PUSHDATA4];
182        buf.extend_from_slice(&(data_len as u32).to_le_bytes());
183        Ok(buf)
184    } else {
185        Err(ScriptError::DataTooBig)
186    }
187}
188
189/// Encode multiple data payloads into a single byte vector with push prefixes.
190///
191/// Each element in `parts` gets its own OP_PUSHDATA prefix based on length.
192///
193/// # Arguments
194/// * `parts` - Slice of data byte slices to encode.
195///
196/// # Returns
197/// A byte vector containing all pushes concatenated, or an error if any
198/// part is too large.
199pub fn encode_push_datas(parts: &[&[u8]]) -> Result<Vec<u8>, ScriptError> {
200    let mut result = Vec::new();
201    for (i, part) in parts.iter().enumerate() {
202        let prefix = push_data_prefix(part.len()).map_err(|_| ScriptError::PartTooBig(i))?;
203        result.extend_from_slice(&prefix);
204        result.extend_from_slice(part);
205    }
206    Ok(result)
207}
208
209#[cfg(test)]
210mod tests {
211    //! Tests for script chunk decoding and push data encoding.
212    //!
213    //! Covers decode_script with simple, complex, and malformed inputs,
214    //! push_data_prefix boundary sizes, encode_push_datas roundtrips,
215    //! and OP_PUSHDATA1/2/4 error cases. Test vectors are derived from
216    //! the Go SDK reference implementation.
217
218    use super::*;
219
220    // -----------------------------------------------------------------------
221    // decode_script - basic cases
222    // -----------------------------------------------------------------------
223
224    /// Decode a script with three simple push chunks and verify count.
225    #[test]
226    fn test_decode_script_simple() {
227        let script_hex = "05000102030401FF02ABCD";
228        let bytes = hex::decode(script_hex).expect("valid hex");
229        let parts = decode_script(&bytes).expect("should decode");
230        assert_eq!(parts.len(), 3);
231    }
232
233    /// Decode and re-encode a simple script to verify roundtrip fidelity.
234    #[test]
235    fn test_decode_and_encode_roundtrip() {
236        let script_hex = "05000102030401FF02ABCD";
237        let bytes = hex::decode(script_hex).expect("valid hex");
238        let parts = decode_script(&bytes).expect("should decode");
239        assert_eq!(parts.len(), 3);
240
241        // Re-encode: gather the data from each chunk
242        let data_parts: Vec<&[u8]> = parts.iter().filter_map(|p| p.data.as_deref()).collect();
243        let encoded = encode_push_datas(&data_parts).expect("should encode");
244        assert_eq!(hex::encode(&encoded), script_hex.to_lowercase());
245    }
246
247    /// Decode an empty byte slice returns an empty chunk vector.
248    #[test]
249    fn test_decode_script_empty() {
250        let parts = decode_script(&[]).expect("should decode");
251        assert!(parts.is_empty());
252    }
253
254    /// Decode a complex multisig-like script with OP_PUSHDATA1 chunks.
255    #[test]
256    fn test_decode_script_complex() {
257        let script_hex = "524c53ff0488b21e000000000000000000362f7a9030543db8751401c387d6a71e870f1895b3a62569d455e8ee5f5f5e5f03036624c6df96984db6b4e625b6707c017eb0e0d137cd13a0c989bfa77a4473fd000000004c53ff0488b21e0000000000000000008b20425398995f3c866ea6ce5c1828a516b007379cf97b136bffbdc86f75df14036454bad23b019eae34f10aff8b8d6d8deb18cb31354e5a169ee09d8a4560e8250000000052ae";
258        let bytes = hex::decode(script_hex).expect("valid hex");
259        let parts = decode_script(&bytes).expect("should decode");
260        assert_eq!(parts.len(), 5);
261    }
262
263    // -----------------------------------------------------------------------
264    // decode_script - error / truncation cases
265    // -----------------------------------------------------------------------
266
267    /// Verify that a truncated direct-push script returns DataTooSmall.
268    #[test]
269    fn test_decode_script_bad_parts() {
270        // 0x05 says "push 5 bytes" but only 3 bytes follow
271        let bytes = hex::decode("05000000").expect("valid hex");
272        let result = decode_script(&bytes);
273        assert!(result.is_err());
274    }
275
276    /// Verify that a truncated OP_PUSHDATA1 script returns DataTooSmall.
277    #[test]
278    fn test_decode_script_invalid_pushdata1() {
279        // OP_PUSHDATA1 = 0x4c, claims 5 bytes but only 4 follow
280        let bytes = hex::decode("4c05000000").expect("valid hex");
281        let result = decode_script(&bytes);
282        assert!(result.is_err());
283    }
284
285    /// Verify OP_PUSHDATA1 with a valid data payload decodes correctly.
286    #[test]
287    fn test_decode_script_pushdata1_valid() {
288        let data = b"testing";
289        let mut script_bytes = vec![OP_PUSHDATA1, data.len() as u8];
290        script_bytes.extend_from_slice(data);
291        let parts = decode_script(&script_bytes).expect("should decode");
292        assert_eq!(parts.len(), 1);
293        assert_eq!(parts[0].op, OP_PUSHDATA1);
294        assert_eq!(parts[0].data.as_ref().unwrap(), data);
295    }
296
297    /// Verify OP_PUSHDATA1 alone (no length byte) returns an error.
298    #[test]
299    fn test_decode_script_pushdata1_missing_payload() {
300        let result = decode_script(&[OP_PUSHDATA1]);
301        assert!(result.is_err());
302    }
303
304    /// Verify OP_PUSHDATA2 alone returns an error.
305    #[test]
306    fn test_decode_script_pushdata2_missing_payload() {
307        let result = decode_script(&[OP_PUSHDATA2]);
308        assert!(result.is_err());
309    }
310
311    /// Verify OP_PUSHDATA2 with insufficient data returns an error.
312    #[test]
313    fn test_decode_script_pushdata2_too_small() {
314        let data = b"testing PUSHDATA2";
315        let mut script_bytes = vec![OP_PUSHDATA2, data.len() as u8];
316        script_bytes.extend_from_slice(data);
317        // Only 1 length byte instead of 2 -- OP_PUSHDATA2 needs 2 bytes for length
318        let result = decode_script(&script_bytes);
319        assert!(result.is_err());
320    }
321
322    /// Verify OP_PUSHDATA4 alone returns an error.
323    #[test]
324    fn test_decode_script_pushdata4_missing_payload() {
325        let result = decode_script(&[OP_PUSHDATA4]);
326        assert!(result.is_err());
327    }
328
329    /// Verify OP_PUSHDATA4 with insufficient data returns an error.
330    #[test]
331    fn test_decode_script_pushdata4_too_small() {
332        let data = b"testing PUSHDATA4";
333        let mut script_bytes = vec![OP_PUSHDATA4, data.len() as u8];
334        script_bytes.extend_from_slice(data);
335        // Only 1 length byte instead of 4 -- will fail
336        let result = decode_script(&script_bytes);
337        assert!(result.is_err());
338    }
339
340    // -----------------------------------------------------------------------
341    // push_data_prefix boundary tests
342    // -----------------------------------------------------------------------
343
344    /// Verify push_data_prefix returns a 1-byte prefix for data <= 75 bytes.
345    #[test]
346    fn test_push_data_prefix_small() {
347        let prefix = push_data_prefix(20).expect("should succeed");
348        assert_eq!(prefix, vec![20u8]);
349    }
350
351    /// Verify push_data_prefix returns a 1-byte prefix at the 75-byte boundary.
352    #[test]
353    fn test_push_data_prefix_75() {
354        let prefix = push_data_prefix(75).expect("should succeed");
355        assert_eq!(prefix, vec![75u8]);
356    }
357
358    /// Verify push_data_prefix returns OP_PUSHDATA1 prefix for 76..=255 bytes.
359    #[test]
360    fn test_push_data_prefix_pushdata1() {
361        let prefix = push_data_prefix(76).expect("should succeed");
362        assert_eq!(prefix, vec![OP_PUSHDATA1, 76]);
363    }
364
365    /// Verify push_data_prefix returns OP_PUSHDATA1 prefix at the 255-byte boundary.
366    #[test]
367    fn test_push_data_prefix_255() {
368        let prefix = push_data_prefix(255).expect("should succeed");
369        assert_eq!(prefix, vec![OP_PUSHDATA1, 255]);
370    }
371
372    /// Verify push_data_prefix returns OP_PUSHDATA2 prefix for 256..=65535 bytes.
373    #[test]
374    fn test_push_data_prefix_pushdata2() {
375        let prefix = push_data_prefix(256).expect("should succeed");
376        assert_eq!(prefix, vec![OP_PUSHDATA2, 0x00, 0x01]);
377    }
378
379    /// Verify push_data_prefix returns OP_PUSHDATA2 prefix at the 65535-byte boundary.
380    #[test]
381    fn test_push_data_prefix_65535() {
382        let prefix = push_data_prefix(65535).expect("should succeed");
383        assert_eq!(prefix, vec![OP_PUSHDATA2, 0xFF, 0xFF]);
384    }
385
386    /// Verify push_data_prefix returns OP_PUSHDATA4 prefix for 65536+ bytes.
387    #[test]
388    fn test_push_data_prefix_pushdata4() {
389        let prefix = push_data_prefix(65536).expect("should succeed");
390        assert_eq!(prefix, vec![OP_PUSHDATA4, 0x00, 0x00, 0x01, 0x00]);
391    }
392
393    // -----------------------------------------------------------------------
394    // encode_push_datas
395    // -----------------------------------------------------------------------
396
397    /// Verify encode_push_datas concatenates multiple pushes correctly.
398    #[test]
399    fn test_encode_push_datas_multiple() {
400        let parts: Vec<&[u8]> = vec![b"hello", b"world"];
401        let encoded = encode_push_datas(&parts).expect("should encode");
402        // "hello" is 5 bytes -> prefix 0x05, "world" is 5 bytes -> prefix 0x05
403        let expected = hex::decode("0568656c6c6f05776f726c64").expect("valid hex");
404        assert_eq!(encoded, expected);
405    }
406
407    /// Verify encode_push_datas with an empty parts list returns empty bytes.
408    #[test]
409    fn test_encode_push_datas_empty() {
410        let parts: Vec<&[u8]> = vec![];
411        let encoded = encode_push_datas(&parts).expect("should encode");
412        assert!(encoded.is_empty());
413    }
414
415    // -----------------------------------------------------------------------
416    // ScriptChunk::to_asm_string
417    // -----------------------------------------------------------------------
418
419    /// Verify that a data-push chunk renders as hex in ASM output.
420    #[test]
421    fn test_chunk_to_asm_string_data() {
422        let chunk = ScriptChunk {
423            op: OP_DATA_20,
424            data: Some(vec![0xAB; 20]),
425        };
426        let asm = chunk.to_asm_string();
427        assert_eq!(asm, "ab".repeat(20));
428    }
429
430    /// Verify that a non-push opcode chunk renders as its OP_xxx name.
431    #[test]
432    fn test_chunk_to_asm_string_opcode() {
433        let chunk = ScriptChunk {
434            op: OP_DUP,
435            data: None,
436        };
437        assert_eq!(chunk.to_asm_string(), "OP_DUP");
438    }
439}