bitcoin/util/
misc.rs

1// Rust Bitcoin Library
2// Written in 2014 by
3//     Andrew Poelstra <apoelstra@wpsoftware.net>
4//
5// To the extent possible under law, the author(s) have dedicated all
6// copyright and related and neighboring rights to this software to
7// the public domain worldwide. This software is distributed without
8// any warranty.
9//
10// You should have received a copy of the CC0 Public Domain Dedication
11// along with this software.
12// If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
13//
14
15//! Miscellaneous functions
16//!
17//! Various utility functions
18
19use blockdata::opcodes;
20use consensus::encode;
21
22/// Helper function to convert hex nibble characters to their respective value
23#[inline]
24fn hex_val(c: u8) -> Result<u8, encode::Error> {
25    let res = match c {
26        b'0' ... b'9' => c - '0' as u8,
27        b'a' ... b'f' => c - 'a' as u8 + 10,
28        b'A' ... b'F' => c - 'A' as u8 + 10,
29        _ => return Err(encode::Error::UnexpectedHexDigit(c as char)),
30    };
31    Ok(res)
32}
33
34/// Convert a hexadecimal-encoded string to its corresponding bytes
35pub fn hex_bytes(data: &str) -> Result<Vec<u8>, encode::Error> {
36    // This code is optimized to be as fast as possible without using unsafe or platform specific
37    // features. If you want to refactor it please make sure you don't introduce performance
38    // regressions (run the benchmark with `cargo bench --features unstable`).
39
40    // If the hex string has an uneven length fail early
41    if data.len() % 2 != 0 {
42        return Err(encode::Error::ParseFailed("hexstring of odd length"));
43    }
44
45    // Preallocate the uninitialized memory for the byte array
46    let mut res = Vec::with_capacity(data.len() / 2);
47
48    let mut hex_it = data.bytes();
49    loop {
50        // Get most significant nibble of current byte or end iteration
51        let msn = match hex_it.next() {
52            None => break,
53            Some(x) => x,
54        };
55
56        // Get least significant nibble of current byte
57        let lsn = match hex_it.next() {
58            None => unreachable!("len % 2 == 0"),
59            Some(x) => x,
60        };
61
62        // Convert bytes representing characters to their represented value and combine lsn and msn.
63        // The and_then and map are crucial for performance, in comparison to using ? and then
64        // using the results of that for the calculation it's nearly twice as fast. Using bit
65        // shifting and or instead of multiply and add on the other hand doesn't show a significant
66        // increase in performance.
67        match hex_val(msn).and_then(|msn_val| hex_val(lsn).map(|lsn_val| msn_val * 16 + lsn_val)) {
68            Ok(x) => res.push(x),
69            Err(e) => return Err(e),
70        }
71    }
72    Ok(res)
73}
74
75/// Search for `needle` in the vector `haystack` and remove every
76/// instance of it, returning the number of instances removed.
77/// Loops through the vector opcode by opcode, skipping pushed data.
78pub fn script_find_and_remove(haystack: &mut Vec<u8>, needle: &[u8]) -> usize {
79    if needle.len() > haystack.len() { return 0; }
80    if needle.len() == 0 { return 0; }
81
82    let mut top = haystack.len() - needle.len();
83    let mut n_deleted = 0;
84
85    let mut i = 0;
86    while i <= top {
87        if &haystack[i..(i + needle.len())] == needle {
88            for j in i..top {
89                haystack.swap(j + needle.len(), j);
90            }
91            n_deleted += 1;
92            // This is ugly but prevents infinite loop in case of overflow
93            let overflow = top < needle.len();
94            top = top.wrapping_sub(needle.len());
95            if overflow { break; }
96        } else {
97            i += match opcodes::All::from((*haystack)[i]).classify() {
98                opcodes::Class::PushBytes(n) => n as usize + 1,
99                opcodes::Class::Ordinary(opcodes::Ordinary::OP_PUSHDATA1) => 2,
100                opcodes::Class::Ordinary(opcodes::Ordinary::OP_PUSHDATA2) => 3,
101                opcodes::Class::Ordinary(opcodes::Ordinary::OP_PUSHDATA4) => 5,
102                _ => 1
103            };
104        }
105    }
106    haystack.truncate(top.wrapping_add(needle.len()));
107    n_deleted
108}
109
110#[cfg(all(test, feature="unstable"))]
111mod benches {
112    use rand::{Rng, thread_rng};
113    use super::hex_bytes;
114    use test::Bencher;
115
116    fn join<I: Iterator<Item=IT>, IT: AsRef<str>>(iter: I, expected_len: usize) -> String {
117        let mut res = String::with_capacity(expected_len);
118        for s in iter {
119            res.push_str(s.as_ref());
120        }
121        res
122    }
123
124    fn bench_from_hex(b: &mut Bencher, data_size: usize) {
125        let data_bytes = thread_rng()
126            .gen_iter()
127            .take(data_size)
128            .collect::<Vec<u8>>();
129        let data = join(data_bytes.iter().map(|x| format!("{:02x}", x)), data_size * 2);
130
131        assert_eq!(hex_bytes(&data).unwrap(), data_bytes);
132
133        b.iter(|| {
134            hex_bytes(&data).unwrap()
135        })
136    }
137
138    #[bench]
139    fn from_hex_16_bytes(b: &mut Bencher) {
140        bench_from_hex(b, 16);
141    }
142
143    #[bench]
144    fn from_hex_64_bytes(b: &mut Bencher) {
145        bench_from_hex(b, 64);
146    }
147
148    #[bench]
149    fn from_hex_256_bytes(b: &mut Bencher) {
150        bench_from_hex(b, 256);
151    }
152
153    #[bench]
154    fn from_hex_4m_bytes(b: &mut Bencher) {
155        bench_from_hex(b, 1024 * 1024 * 4);
156    }
157}
158
159#[cfg(test)]
160mod tests {
161    use super::script_find_and_remove;
162    use super::hex_bytes;
163
164    #[test]
165    fn test_script_find_and_remove() {
166        let mut v = vec![101u8, 102, 103, 104, 102, 103, 104, 102, 103, 104, 105, 106, 107, 108, 109];
167
168        assert_eq!(script_find_and_remove(&mut v, &[]), 0);
169        assert_eq!(script_find_and_remove(&mut v, &[105, 105, 105]), 0);
170        assert_eq!(v, vec![101, 102, 103, 104, 102, 103, 104, 102, 103, 104, 105, 106, 107, 108, 109]);
171
172        assert_eq!(script_find_and_remove(&mut v, &[105, 106, 107]), 1);
173        assert_eq!(v, vec![101, 102, 103, 104, 102, 103, 104, 102, 103, 104, 108, 109]);
174
175        assert_eq!(script_find_and_remove(&mut v, &[104, 108, 109]), 1);
176        assert_eq!(v, vec![101, 102, 103, 104, 102, 103, 104, 102, 103]);
177
178        assert_eq!(script_find_and_remove(&mut v, &[101]), 1);
179        assert_eq!(v, vec![102, 103, 104, 102, 103, 104, 102, 103]);
180
181        assert_eq!(script_find_and_remove(&mut v, &[102]), 3);
182        assert_eq!(v, vec![103, 104, 103, 104, 103]);
183
184        assert_eq!(script_find_and_remove(&mut v, &[103, 104]), 2);
185        assert_eq!(v, vec![103]);
186
187        assert_eq!(script_find_and_remove(&mut v, &[105, 105, 5]), 0);
188        assert_eq!(script_find_and_remove(&mut v, &[105]), 0);
189        assert_eq!(script_find_and_remove(&mut v, &[103]), 1);
190        assert_eq!(v, vec![]);
191
192        assert_eq!(script_find_and_remove(&mut v, &[105, 105, 5]), 0);
193        assert_eq!(script_find_and_remove(&mut v, &[105]), 0);
194    }
195
196    #[test]
197    fn test_script_codesep_remove() {
198        let mut s = vec![33u8, 3, 132, 121, 160, 250, 153, 140, 211, 82, 89, 162, 239, 10, 122, 92, 104, 102, 44, 20, 116, 248, 140, 203, 109, 8, 167, 103, 123, 190, 199, 242, 32, 65, 173, 171, 33, 3, 132, 121, 160, 250, 153, 140, 211, 82, 89, 162, 239, 10, 122, 92, 104, 102, 44, 20, 116, 248, 140, 203, 109, 8, 167, 103, 123, 190, 199, 242, 32, 65, 173, 171, 81];
199        assert_eq!(script_find_and_remove(&mut s, &[171]), 2);
200        assert_eq!(s, vec![33, 3, 132, 121, 160, 250, 153, 140, 211, 82, 89, 162, 239, 10, 122, 92, 104, 102, 44, 20, 116, 248, 140, 203, 109, 8, 167, 103, 123, 190, 199, 242, 32, 65, 173, 33, 3, 132, 121, 160, 250, 153, 140, 211, 82, 89, 162, 239, 10, 122, 92, 104, 102, 44, 20, 116, 248, 140, 203, 109, 8, 167, 103, 123, 190, 199, 242, 32, 65, 173, 81]);
201    }
202
203    #[test]
204    fn test_hex_bytes() {
205        assert_eq!(&hex_bytes("abcd").unwrap(), &[171u8, 205]);
206        assert!(hex_bytes("abcde").is_err());
207        assert!(hex_bytes("aBcDeF").is_ok());
208        assert!(hex_bytes("aBcD4eFL").is_err());
209    }
210}
211