elements_miniscript/miniscript/
lex.rs

1// Written in 2018 by Andrew Poelstra <apoelstra@wpsoftware.net>
2// SPDX-License-Identifier: CC0-1.0
3
4//! Lexer
5//!
6//! Translates a script into a reversed sequence of tokens
7//!
8
9use std::fmt;
10
11use elements::{opcodes, script};
12
13use super::Error;
14use crate::util::{build_scriptint, slice_to_u32_le};
15/// Atom of a tokenized version of a script
16#[derive(Debug, Clone, PartialEq, Eq)]
17#[allow(missing_docs)]
18pub enum Token<'s> {
19    BoolAnd,
20    BoolOr,
21    Add,
22    Sub,
23    Equal,
24    NumEqual,
25    CheckSig,
26    CheckSigFromStack,
27    CheckSigAdd,
28    CheckSigFromStackVerify,
29    CheckMultiSig,
30    CheckSequenceVerify,
31    CheckLockTimeVerify,
32    FromAltStack,
33    ToAltStack,
34    Left,
35    Cat,
36    CodeSep,
37    Over,
38    Pick,
39    Depth,
40    Drop,
41    Dup,
42    If,
43    IfDup,
44    NotIf,
45    Else,
46    EndIf,
47    ZeroNotEqual,
48    Size,
49    Swap,
50    Verify,
51    Ripemd160,
52    Hash160,
53    Sha256,
54    Hash256,
55    Num(u32),
56    Hash20(&'s [u8]),
57    Bytes8(&'s [u8]),
58    Bytes32(&'s [u8]),
59    Bytes33(&'s [u8]),
60    Bytes65(&'s [u8]),
61    Push(Vec<u8>),        // Num or a
62    PickPush4(u32),       // Pick followed by a 4 byte push
63    PickPush32([u8; 32]), // Pick followed by a 32 byte push
64    PickPush(Vec<u8>),    // Pick followed by a push
65    InpValue,
66    OutValue,
67    InpIssue,
68    Leq64,
69    Le64,
70    Geq64,
71    Ge64,
72    Neg64,
73    Div64,
74    Mul64,
75    Sub64,
76    Add64,
77    Nip,
78    And,
79    Or,
80    Xor,
81    Invert,
82    CurrInp,
83    InpAsset,
84    OutAsset,
85    OutSpk,
86    InpSpk,
87    NumNeg1,
88    ScriptNumToLe64,
89    Le64ToScriptNum,
90    Dup2,
91}
92
93impl<'s> fmt::Display for Token<'s> {
94    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
95        match self {
96            Token::Num(n) => write!(f, "#{}", n),
97            Token::Hash20(b) | Token::Bytes33(b) | Token::Bytes32(b) | Token::Bytes65(b) => {
98                for ch in &b[..] {
99                    write!(f, "{:02x}", *ch)?;
100                }
101                Ok(())
102            }
103            x => write!(f, "{:?}", x),
104        }
105    }
106}
107
108#[derive(Debug, Clone)]
109/// Iterator that goes through a vector of tokens backward (our parser wants to read
110/// backward and this is more efficient anyway since we can use `Vec::pop()`).
111// This really does not need to be an iterator because the way we are using it, we are
112// actually collecting lexed symbols into a vector. If that is the case, might as well
113// use the inner vector directly
114pub struct TokenIter<'s>(Vec<Token<'s>>);
115
116impl<'s> TokenIter<'s> {
117    /// Create a new TokenIter
118    pub fn new(v: Vec<Token<'s>>) -> TokenIter<'s> {
119        TokenIter(v)
120    }
121
122    /// Look at the top at Iterator
123    pub fn peek(&self) -> Option<&'s Token<'_>> {
124        self.0.last()
125    }
126
127    /// Look at the slice with the last n elements
128    pub fn peek_slice(&self, n: usize) -> Option<&[Token<'_>]> {
129        if n <= self.len() {
130            Some(self.0[self.len() - n..].as_ref())
131        } else {
132            None
133        }
134    }
135
136    /// Advance the iterator n times
137    /// Returns Some(()) if the iterator can be advanced n times
138    pub fn advance(&mut self, n: usize) -> Option<()> {
139        if n <= self.len() {
140            for _ in 0..n {
141                self.next();
142            }
143            Some(())
144        } else {
145            None
146        }
147    }
148
149    /// Push a value to the iterator
150    /// This will be first value consumed by popun_
151    pub fn un_next(&mut self, tok: Token<'s>) {
152        self.0.push(tok)
153    }
154
155    /// The len of the iterator
156    pub fn len(&self) -> usize {
157        self.0.len()
158    }
159
160    /// Check if the iterator is empty
161    pub fn is_empty(&self) -> bool {
162        self.0.is_empty()
163    }
164
165    /// Get the inner mutable vector
166    pub fn as_inner_mut(&mut self) -> &mut Vec<Token<'s>> {
167        &mut self.0
168    }
169}
170
171impl<'s> Iterator for TokenIter<'s> {
172    type Item = Token<'s>;
173
174    fn next(&mut self) -> Option<Token<'s>> {
175        self.0.pop()
176    }
177}
178
179/// Tokenize a script
180pub fn lex(script: &script::Script) -> Result<Vec<Token<'_>>, Error> {
181    let mut ret = Vec::with_capacity(script.len());
182
183    fn process_candidate_push(ret: &mut [Token<'_>]) -> Result<(), Error> {
184        let ret_len = ret.len();
185
186        if ret_len < 2 || ret[ret_len - 1] != Token::Swap {
187            return Ok(());
188        }
189        let token = match &ret[ret_len - 2] {
190            Token::Hash20(x) => Token::Push(x.to_vec()),
191            Token::Bytes32(x) | Token::Bytes33(x) | Token::Bytes65(x) => Token::Push(x.to_vec()),
192            Token::Num(k) => Token::Push(build_scriptint(*k as i64)),
193            _x => return Ok(()), // no change required
194        };
195        ret[ret_len - 2] = token;
196        Ok(())
197    }
198
199    for ins in script.instructions_minimal() {
200        match ins.map_err(Error::Script)? {
201            script::Instruction::Op(opcodes::all::OP_BOOLAND) => {
202                ret.push(Token::BoolAnd);
203            }
204            script::Instruction::Op(opcodes::all::OP_BOOLOR) => {
205                ret.push(Token::BoolOr);
206            }
207            script::Instruction::Op(opcodes::all::OP_EQUAL) => {
208                ret.push(Token::Equal);
209            }
210            script::Instruction::Op(opcodes::all::OP_EQUALVERIFY) => {
211                ret.push(Token::Equal);
212                ret.push(Token::Verify);
213            }
214            script::Instruction::Op(opcodes::all::OP_NUMEQUAL) => {
215                ret.push(Token::NumEqual);
216            }
217            script::Instruction::Op(opcodes::all::OP_NUMEQUALVERIFY) => {
218                ret.push(Token::NumEqual);
219                ret.push(Token::Verify);
220            }
221            script::Instruction::Op(opcodes::all::OP_CHECKSIG) => {
222                ret.push(Token::CheckSig);
223            }
224            script::Instruction::Op(opcodes::all::OP_CHECKSIGFROMSTACK) => {
225                ret.push(Token::CheckSigFromStack);
226            }
227            script::Instruction::Op(opcodes::all::OP_CHECKSIGFROMSTACKVERIFY) => {
228                ret.push(Token::CheckSigFromStackVerify);
229            }
230            script::Instruction::Op(opcodes::all::OP_CHECKSIGVERIFY) => {
231                ret.push(Token::CheckSig);
232                ret.push(Token::Verify);
233            }
234            script::Instruction::Op(opcodes::all::OP_CHECKSIGADD) => {
235                ret.push(Token::CheckSigAdd);
236            }
237            script::Instruction::Op(opcodes::all::OP_INSPECTINPUTVALUE) => {
238                ret.push(Token::InpValue);
239            }
240            script::Instruction::Op(opcodes::all::OP_INSPECTOUTPUTVALUE) => {
241                ret.push(Token::OutValue);
242            }
243            script::Instruction::Op(opcodes::all::OP_INSPECTINPUTASSET) => {
244                ret.push(Token::InpAsset);
245            }
246            script::Instruction::Op(opcodes::all::OP_INSPECTOUTPUTASSET) => {
247                ret.push(Token::OutAsset);
248            }
249            script::Instruction::Op(opcodes::all::OP_INSPECTINPUTSCRIPTPUBKEY) => {
250                ret.push(Token::InpSpk);
251            }
252            script::Instruction::Op(opcodes::all::OP_INSPECTOUTPUTSCRIPTPUBKEY) => {
253                ret.push(Token::OutSpk);
254            }
255            script::Instruction::Op(opcodes::all::OP_INSPECTINPUTISSUANCE) => {
256                ret.push(Token::InpIssue);
257            }
258            script::Instruction::Op(opcodes::all::OP_PUSHCURRENTINPUTINDEX) => {
259                ret.push(Token::CurrInp);
260            }
261            script::Instruction::Op(opcodes::all::OP_ADD64) => {
262                ret.push(Token::Add64);
263            }
264            script::Instruction::Op(opcodes::all::OP_SUB64) => {
265                ret.push(Token::Sub64);
266            }
267            script::Instruction::Op(opcodes::all::OP_MUL64) => {
268                ret.push(Token::Mul64);
269            }
270            script::Instruction::Op(opcodes::all::OP_DIV64) => {
271                ret.push(Token::Div64);
272            }
273            script::Instruction::Op(opcodes::all::OP_NEG64) => {
274                ret.push(Token::Neg64);
275            }
276            script::Instruction::Op(opcodes::all::OP_GREATERTHAN64) => {
277                ret.push(Token::Ge64);
278            }
279            script::Instruction::Op(opcodes::all::OP_GREATERTHANOREQUAL64) => {
280                ret.push(Token::Geq64);
281            }
282            script::Instruction::Op(opcodes::all::OP_LESSTHAN64) => {
283                ret.push(Token::Le64);
284            }
285            script::Instruction::Op(opcodes::all::OP_LESSTHANOREQUAL64) => {
286                ret.push(Token::Leq64);
287            }
288            script::Instruction::Op(opcodes::all::OP_AND) => {
289                ret.push(Token::And);
290            }
291            script::Instruction::Op(opcodes::all::OP_OR) => {
292                ret.push(Token::Or);
293            }
294            script::Instruction::Op(opcodes::all::OP_XOR) => {
295                ret.push(Token::Xor);
296            }
297            script::Instruction::Op(opcodes::all::OP_INVERT) => {
298                ret.push(Token::Invert);
299            }
300            script::Instruction::Op(opcodes::all::OP_SCRIPTNUMTOLE64) => {
301                ret.push(Token::ScriptNumToLe64);
302            }
303            script::Instruction::Op(opcodes::all::OP_LE64TOSCRIPTNUM) => {
304                ret.push(Token::Le64ToScriptNum);
305            }
306            script::Instruction::Op(opcodes::all::OP_CHECKMULTISIG) => {
307                ret.push(Token::CheckMultiSig);
308            }
309            script::Instruction::Op(opcodes::all::OP_CHECKMULTISIGVERIFY) => {
310                ret.push(Token::CheckMultiSig);
311                ret.push(Token::Verify);
312            }
313            script::Instruction::Op(op) if op == opcodes::all::OP_CSV => {
314                ret.push(Token::CheckSequenceVerify);
315            }
316            script::Instruction::Op(op) if op == opcodes::all::OP_CLTV => {
317                ret.push(Token::CheckLockTimeVerify);
318            }
319            script::Instruction::Op(opcodes::all::OP_FROMALTSTACK) => {
320                ret.push(Token::FromAltStack);
321            }
322            script::Instruction::Op(opcodes::all::OP_TOALTSTACK) => {
323                ret.push(Token::ToAltStack);
324            }
325            script::Instruction::Op(opcodes::all::OP_LEFT) => {
326                ret.push(Token::Left);
327            }
328            script::Instruction::Op(opcodes::all::OP_2DUP) => {
329                ret.push(Token::Dup2);
330            }
331            script::Instruction::Op(opcodes::all::OP_CAT) => {
332                process_candidate_push(&mut ret)?;
333                ret.push(Token::Cat);
334            }
335            script::Instruction::Op(opcodes::all::OP_CODESEPARATOR) => {
336                ret.push(Token::CodeSep);
337            }
338            script::Instruction::Op(opcodes::all::OP_OVER) => {
339                ret.push(Token::Over);
340            }
341            script::Instruction::Op(opcodes::all::OP_NIP) => {
342                ret.push(Token::Nip);
343            }
344            script::Instruction::Op(opcodes::all::OP_PICK) => {
345                ret.push(Token::Pick);
346            }
347            script::Instruction::Op(opcodes::all::OP_DROP) => {
348                ret.push(Token::Drop);
349            }
350            script::Instruction::Op(opcodes::all::OP_DEPTH) => {
351                ret.push(Token::Depth);
352            }
353            script::Instruction::Op(opcodes::all::OP_DUP) => {
354                ret.push(Token::Dup);
355            }
356            script::Instruction::Op(opcodes::all::OP_ADD) => {
357                ret.push(Token::Add);
358            }
359            script::Instruction::Op(opcodes::all::OP_SUB) => {
360                ret.push(Token::Sub);
361            }
362            script::Instruction::Op(opcodes::all::OP_IF) => {
363                ret.push(Token::If);
364            }
365            script::Instruction::Op(opcodes::all::OP_IFDUP) => {
366                ret.push(Token::IfDup);
367            }
368            script::Instruction::Op(opcodes::all::OP_NOTIF) => {
369                ret.push(Token::NotIf);
370            }
371            script::Instruction::Op(opcodes::all::OP_ELSE) => {
372                ret.push(Token::Else);
373            }
374            script::Instruction::Op(opcodes::all::OP_ENDIF) => {
375                ret.push(Token::EndIf);
376            }
377            script::Instruction::Op(opcodes::all::OP_0NOTEQUAL) => {
378                ret.push(Token::ZeroNotEqual);
379            }
380            script::Instruction::Op(opcodes::all::OP_SIZE) => {
381                ret.push(Token::Size);
382            }
383            script::Instruction::Op(opcodes::all::OP_SWAP) => {
384                ret.push(Token::Swap);
385            }
386            script::Instruction::Op(opcodes::all::OP_VERIFY) => {
387                match ret.last() {
388                    Some(op @ &Token::Equal)
389                    | Some(op @ &Token::CheckSig)
390                    | Some(op @ &Token::CheckMultiSig) => {
391                        return Err(Error::NonMinimalVerify(format!("{:?}", op)))
392                    }
393                    _ => {}
394                }
395                ret.push(Token::Verify);
396            }
397            script::Instruction::Op(opcodes::all::OP_RIPEMD160) => {
398                ret.push(Token::Ripemd160);
399            }
400            script::Instruction::Op(opcodes::all::OP_HASH160) => {
401                ret.push(Token::Hash160);
402            }
403            script::Instruction::Op(opcodes::all::OP_SHA256) => {
404                ret.push(Token::Sha256);
405            }
406            script::Instruction::Op(opcodes::all::OP_HASH256) => {
407                ret.push(Token::Hash256);
408            }
409            script::Instruction::PushBytes(bytes) => {
410                // Check for Pick Push
411                // Special handling of tokens for Covenants
412                // To determine whether some Token is actually
413                // 4 bytes push or a script int of 4 bytes,
414                // we need additional script context
415                if ret.last() == Some(&Token::Pick) {
416                    ret.pop().unwrap();
417                    match bytes.len() {
418                        // All other sighash elements are 32 bytes. And the script code
419                        // is 24 bytes
420                        4 => ret.push(Token::PickPush4(slice_to_u32_le(bytes))),
421                        32 => {
422                            let mut x = [0u8; 32];
423                            x.copy_from_slice(bytes);
424                            ret.push(Token::PickPush32(x));
425                        }
426                        // Other pushes should be err. This will change
427                        // once we add script introspection
428                        _ => return Err(Error::InvalidPush(bytes.to_owned())),
429                    }
430                } else {
431                    // Create the most specific type possible out of the
432                    // Push. When we later encounter CAT, revisit and
433                    // reconvert these to pushes.
434                    // See [process_candidate_push]
435                    match bytes.len() {
436                        8 => ret.push(Token::Bytes8(bytes)),
437                        20 => ret.push(Token::Hash20(bytes)),
438                        32 => ret.push(Token::Bytes32(bytes)),
439                        33 => ret.push(Token::Bytes33(bytes)),
440                        65 => ret.push(Token::Bytes65(bytes)),
441                        _ => {
442                            match script::read_scriptint(bytes) {
443                                Ok(v) if v >= 0 => {
444                                    // check minimality of the number
445                                    if &script::Builder::new().push_int(v).into_script()[1..]
446                                        != bytes
447                                    {
448                                        return Err(Error::InvalidPush(bytes.to_owned()));
449                                    }
450                                    ret.push(Token::Num(v as u32));
451                                }
452                                _ => ret.push(Token::Push(bytes.to_owned())),
453                            }
454                        }
455                    }
456                }
457            }
458            script::Instruction::Op(opcodes::all::OP_PUSHNUM_NEG1) => {
459                ret.push(Token::NumNeg1);
460            }
461            script::Instruction::Op(opcodes::all::OP_PUSHBYTES_0) => {
462                ret.push(Token::Num(0));
463            }
464            script::Instruction::Op(opcodes::all::OP_PUSHNUM_1) => {
465                ret.push(Token::Num(1));
466            }
467            script::Instruction::Op(opcodes::all::OP_PUSHNUM_2) => {
468                ret.push(Token::Num(2));
469            }
470            script::Instruction::Op(opcodes::all::OP_PUSHNUM_3) => {
471                ret.push(Token::Num(3));
472            }
473            script::Instruction::Op(opcodes::all::OP_PUSHNUM_4) => {
474                ret.push(Token::Num(4));
475            }
476            script::Instruction::Op(opcodes::all::OP_PUSHNUM_5) => {
477                ret.push(Token::Num(5));
478            }
479            script::Instruction::Op(opcodes::all::OP_PUSHNUM_6) => {
480                ret.push(Token::Num(6));
481            }
482            script::Instruction::Op(opcodes::all::OP_PUSHNUM_7) => {
483                ret.push(Token::Num(7));
484            }
485            script::Instruction::Op(opcodes::all::OP_PUSHNUM_8) => {
486                ret.push(Token::Num(8));
487            }
488            script::Instruction::Op(opcodes::all::OP_PUSHNUM_9) => {
489                ret.push(Token::Num(9));
490            }
491            script::Instruction::Op(opcodes::all::OP_PUSHNUM_10) => {
492                ret.push(Token::Num(10));
493            }
494            script::Instruction::Op(opcodes::all::OP_PUSHNUM_11) => {
495                ret.push(Token::Num(11));
496            }
497            script::Instruction::Op(opcodes::all::OP_PUSHNUM_12) => {
498                ret.push(Token::Num(12));
499            }
500            script::Instruction::Op(opcodes::all::OP_PUSHNUM_13) => {
501                ret.push(Token::Num(13));
502            }
503            script::Instruction::Op(opcodes::all::OP_PUSHNUM_14) => {
504                ret.push(Token::Num(14));
505            }
506            script::Instruction::Op(opcodes::all::OP_PUSHNUM_15) => {
507                ret.push(Token::Num(15));
508            }
509            script::Instruction::Op(opcodes::all::OP_PUSHNUM_16) => {
510                ret.push(Token::Num(16));
511            }
512            script::Instruction::Op(op) => return Err(Error::InvalidOpcode(op)),
513        };
514    }
515    Ok(ret)
516}