mini_bitcoin_script/
tokenizer.rs1use crate::error::ScriptError;
2use crate::hex::decode_hex;
3use crate::opcode::Opcode;
4use crate::token::Token;
5
6pub fn parse_script(bytes: &[u8]) -> Result<Vec<Token>, ScriptError> {
19 let mut tokens = Vec::new();
20 let mut pos = 0;
21 let len = bytes.len();
22
23 while pos < len {
24 let byte = bytes[pos];
25 pos += 1;
26
27 match byte {
28 0x01..=0x4b => {
30 let n = byte as usize;
31 if pos + n > len {
32 return Err(ScriptError::UnexpectedEndOfScript);
33 }
34 tokens.push(Token::PushData(bytes[pos..pos + n].to_vec()));
35 pos += n;
36 }
37
38 0x4c => {
40 if pos >= len {
41 return Err(ScriptError::UnexpectedEndOfScript);
42 }
43 let n = bytes[pos] as usize;
44 pos += 1;
45 if pos + n > len {
46 return Err(ScriptError::UnexpectedEndOfScript);
47 }
48 tokens.push(Token::PushData(bytes[pos..pos + n].to_vec()));
49 pos += n;
50 }
51
52 0x4d => {
54 if pos + 2 > len {
55 return Err(ScriptError::UnexpectedEndOfScript);
56 }
57 let n = u16::from_le_bytes([bytes[pos], bytes[pos + 1]]) as usize;
58 pos += 2;
59 if pos + n > len {
60 return Err(ScriptError::UnexpectedEndOfScript);
61 }
62 tokens.push(Token::PushData(bytes[pos..pos + n].to_vec()));
63 pos += n;
64 }
65
66 0x4e => {
68 if pos + 4 > len {
69 return Err(ScriptError::UnexpectedEndOfScript);
70 }
71 let n = u32::from_le_bytes([
72 bytes[pos],
73 bytes[pos + 1],
74 bytes[pos + 2],
75 bytes[pos + 3],
76 ]) as usize;
77 pos += 4;
78 if pos + n > len {
79 return Err(ScriptError::UnexpectedEndOfScript);
80 }
81 tokens.push(Token::PushData(bytes[pos..pos + n].to_vec()));
82 pos += n;
83 }
84
85 _ => match Opcode::from_byte(byte) {
87 Some(opcode) => tokens.push(Token::Op(opcode)),
88 None => return Err(ScriptError::UnsupportedOpcode(byte)),
89 },
90 }
91 }
92
93 Ok(tokens)
94}
95
96pub fn parse_script_hex(hex: &str) -> Result<Vec<Token>, ScriptError> {
101 let bytes = decode_hex(hex)?;
102 parse_script(&bytes)
103}
104
105#[cfg(test)]
106mod tests {
107 use super::*;
108
109 #[test]
110 fn empty_script() {
111 let tokens = parse_script(&[]).unwrap();
112 assert!(tokens.is_empty());
113 }
114
115 #[test]
116 fn single_opcode() {
117 let tokens = parse_script(&[0x76]).unwrap(); assert_eq!(tokens, vec![Token::Op(Opcode::OpDup)]);
119 }
120
121 #[test]
122 fn direct_push_3_bytes() {
123 let tokens = parse_script(&[0x03, 0xaa, 0xbb, 0xcc]).unwrap();
124 assert_eq!(tokens, vec![Token::PushData(vec![0xaa, 0xbb, 0xcc])]);
125 }
126
127 #[test]
128 fn direct_push_truncated() {
129 let err = parse_script(&[0x03, 0xaa, 0xbb]).unwrap_err();
130 assert!(matches!(err, ScriptError::UnexpectedEndOfScript));
131 }
132
133 #[test]
134 fn pushdata1() {
135 let tokens = parse_script(&[0x4c, 0x02, 0xde, 0xad]).unwrap();
136 assert_eq!(tokens, vec![Token::PushData(vec![0xde, 0xad])]);
137 }
138
139 #[test]
140 fn pushdata1_missing_length() {
141 let err = parse_script(&[0x4c]).unwrap_err();
142 assert!(matches!(err, ScriptError::UnexpectedEndOfScript));
143 }
144
145 #[test]
146 fn pushdata1_truncated_data() {
147 let err = parse_script(&[0x4c, 0x05, 0x01, 0x02]).unwrap_err();
148 assert!(matches!(err, ScriptError::UnexpectedEndOfScript));
149 }
150
151 #[test]
152 fn pushdata2() {
153 let tokens = parse_script(&[0x4d, 0x03, 0x00, 0xaa, 0xbb, 0xcc]).unwrap();
155 assert_eq!(tokens, vec![Token::PushData(vec![0xaa, 0xbb, 0xcc])]);
156 }
157
158 #[test]
159 fn pushdata2_missing_length() {
160 let err = parse_script(&[0x4d, 0x03]).unwrap_err();
161 assert!(matches!(err, ScriptError::UnexpectedEndOfScript));
162 }
163
164 #[test]
165 fn pushdata4() {
166 let tokens = parse_script(&[0x4e, 0x03, 0x00, 0x00, 0x00, 0xaa, 0xbb, 0xcc]).unwrap();
168 assert_eq!(tokens, vec![Token::PushData(vec![0xaa, 0xbb, 0xcc])]);
169 }
170
171 #[test]
172 fn pushdata4_missing_length() {
173 let err = parse_script(&[0x4e, 0x01, 0x00]).unwrap_err();
174 assert!(matches!(err, ScriptError::UnexpectedEndOfScript));
175 }
176
177 #[test]
178 fn unsupported_opcode() {
179 let err = parse_script(&[0x50]).unwrap_err(); assert!(matches!(err, ScriptError::UnsupportedOpcode(0x50)));
181 }
182
183 #[test]
184 fn op0_parses() {
185 let tokens = parse_script(&[0x00]).unwrap();
186 assert_eq!(tokens, vec![Token::Op(Opcode::Op0)]);
187 }
188
189 #[test]
190 fn multi_token_script() {
191 let mut script = vec![0x76, 0xa9, 0x14]; script.extend_from_slice(&[0xab; 20]); script.push(0x88); script.push(0xac); let tokens = parse_script(&script).unwrap();
197 assert_eq!(tokens.len(), 5);
198 assert_eq!(tokens[0], Token::Op(Opcode::OpDup));
199 assert_eq!(tokens[1], Token::Op(Opcode::OpHash160));
200 assert_eq!(tokens[2], Token::PushData(vec![0xab; 20]));
201 assert_eq!(tokens[3], Token::Op(Opcode::OpEqualVerify));
202 assert_eq!(tokens[4], Token::Op(Opcode::OpCheckSig));
203 }
204
205 #[test]
206 fn parse_script_hex_roundtrip() {
207 let hex = "76a914" .to_string()
209 + &"ab".repeat(20) + "88ac"; let tokens = parse_script_hex(&hex).unwrap();
212 assert_eq!(tokens.len(), 5);
213 assert_eq!(tokens[0], Token::Op(Opcode::OpDup));
214 assert_eq!(tokens[4], Token::Op(Opcode::OpCheckSig));
215 }
216
217 #[test]
218 fn parse_script_hex_invalid() {
219 let err = parse_script_hex("zzzz").unwrap_err();
220 assert!(matches!(err, ScriptError::InvalidHex));
221 }
222
223 #[test]
224 fn pushdata1_zero_length() {
225 let tokens = parse_script(&[0x4c, 0x00]).unwrap();
226 assert_eq!(tokens, vec![Token::PushData(vec![])]);
227 }
228
229 #[test]
230 fn direct_push_1_byte() {
231 let tokens = parse_script(&[0x01, 0xff]).unwrap();
232 assert_eq!(tokens, vec![Token::PushData(vec![0xff])]);
233 }
234}