cherry_evm_validate/
lib.rs

1use anyhow::{anyhow, Context, Result};
2use arrow::{
3    array::{Array, BinaryArray, UInt64Array},
4    record_batch::RecordBatch,
5};
6
7/// Checks that:
8///
9/// - Everything is ordered by (block_number, tx_index/log_index)
10///
11/// - No gaps in (block_number, tx_index/log_index)
12///
13/// - block_hash/tx_hash matches with block_number/(block_number, tx_index)
14///
15/// - parent hash matches with previous block's hash
16///
17pub fn validate_block_data(
18    blocks: &RecordBatch,
19    transactions: &RecordBatch,
20    logs: &RecordBatch,
21    traces: &RecordBatch,
22) -> Result<()> {
23    let block_numbers = blocks
24        .column_by_name("number")
25        .context("get block number column")?
26        .as_any()
27        .downcast_ref::<UInt64Array>()
28        .context("get block number column as u64")?;
29
30    if block_numbers.null_count() > 0 {
31        return Err(anyhow!("block.number column can't have nulls"));
32    }
33
34    let first_block_num = block_numbers
35        .iter()
36        .next()
37        .map(Option::unwrap)
38        .unwrap_or_default();
39    let mut current_bn = first_block_num;
40    for bn in block_numbers.iter().skip(1) {
41        let bn = bn.unwrap();
42        if current_bn + 1 != bn {
43            return Err(anyhow!(
44                "block.number column is not consistent. {} != {}",
45                current_bn + 1,
46                bn
47            ));
48        }
49        current_bn = bn;
50    }
51
52    let block_hashes = blocks
53        .column_by_name("hash")
54        .context("get block hash column")?
55        .as_any()
56        .downcast_ref::<BinaryArray>()
57        .context("get block hash as binary array")?;
58
59    let block_parent_hashes = blocks
60        .column_by_name("parent_hash")
61        .context("get block parent_hash column")?
62        .as_any()
63        .downcast_ref::<BinaryArray>()
64        .context("get block parent_hash as binary array")?;
65
66    let get_block_hash = |block_num: u64| -> Option<&[u8]> {
67        let pos = usize::try_from(block_num.checked_sub(first_block_num)?).unwrap();
68        if pos < block_hashes.len() {
69            Some(block_hashes.value(pos))
70        } else {
71            None
72        }
73    };
74
75    if block_hashes.null_count() > 0 {
76        return Err(anyhow!("block.hash column can't have nulls"));
77    }
78
79    if block_parent_hashes.null_count() > 0 {
80        return Err(anyhow!("block.parent_has column can't have nulls"));
81    }
82
83    for (expected_parent_hash, parent_hash) in
84        block_hashes.iter().zip(block_parent_hashes.iter().skip(1))
85    {
86        let expected_parent_hash = expected_parent_hash.unwrap();
87        let parent_hash = parent_hash.unwrap();
88        if expected_parent_hash != parent_hash {
89            return Err(anyhow!(
90                "bad parent hash found. expected {}, found {}",
91                faster_hex::hex_string(expected_parent_hash),
92                faster_hex::hex_string(parent_hash)
93            ));
94        }
95    }
96
97    validate_block_hashes(get_block_hash, transactions).context("validate tx block hashes")?;
98    validate_block_hashes(get_block_hash, logs).context("validate log block hashes")?;
99    validate_block_hashes(get_block_hash, traces).context("validate trace block hashes")?;
100
101    // Validate tx ordering and check tx hashes of other tables
102
103    let mut tx_hash_mapping = vec![Vec::<[u8; 32]>::with_capacity(200); block_numbers.len()];
104
105    let tx_hashes = transactions
106        .column_by_name("hash")
107        .context("get tx hash col")?
108        .as_any()
109        .downcast_ref::<BinaryArray>()
110        .context("tx hash col as binary")?;
111    let tx_block_nums = transactions
112        .column_by_name("block_number")
113        .context("get tx block num col")?
114        .as_any()
115        .downcast_ref::<UInt64Array>()
116        .context("get tx block num col as u64")?;
117    let tx_indices = transactions
118        .column_by_name("transaction_index")
119        .context("get tx index column")?
120        .as_any()
121        .downcast_ref::<UInt64Array>()
122        .context("get tx index col as u64")?;
123
124    if tx_hashes.null_count() > 0 {
125        return Err(anyhow!("tx hash column can't have nulls"));
126    }
127    if tx_block_nums.null_count() > 0 {
128        return Err(anyhow!("tx block number column can't have nulls"));
129    }
130    if tx_indices.null_count() > 0 {
131        return Err(anyhow!("tx index column can't have nulls"));
132    }
133
134    let mut expected_tx_index = 0;
135    let mut current_block_num = first_block_num;
136
137    for ((tx_hash, tx_bn), tx_idx) in tx_hashes
138        .iter()
139        .zip(tx_block_nums.iter())
140        .zip(tx_indices.iter())
141    {
142        let tx_hash = tx_hash.unwrap();
143        let tx_bn = tx_bn.unwrap();
144        let tx_idx = tx_idx.unwrap();
145
146        if tx_bn != current_block_num {
147            if tx_bn < current_block_num {
148                return Err(anyhow!(
149                    "found wrong ordering in tx block numbers after block num {}",
150                    current_block_num
151                ));
152            }
153
154            current_block_num = tx_bn;
155            expected_tx_index = 0;
156        }
157
158        if tx_idx != expected_tx_index {
159            return Err(anyhow!(
160                "found unexpected tx index at the start of block {}",
161                current_block_num
162            ));
163        }
164        expected_tx_index += 1;
165
166        let block_pos = tx_bn
167            .checked_sub(first_block_num)
168            .with_context(|| format!("unexpected block num {} in transactions", tx_bn))?;
169        let mappings = tx_hash_mapping
170            .get_mut(usize::try_from(block_pos).unwrap())
171            .unwrap();
172
173        assert_eq!(mappings.len(), usize::try_from(tx_idx).unwrap());
174
175        if tx_hash.len() != 32 {
176            return Err(anyhow!("found bad tx hash at {},{}", tx_bn, tx_idx));
177        }
178
179        mappings.push(tx_hash.try_into().unwrap());
180    }
181
182    validate_transaction_hashes(first_block_num, &tx_hash_mapping, logs, "transaction_index")
183        .context("check tx hashes in logs")?;
184    validate_transaction_hashes(
185        first_block_num,
186        &tx_hash_mapping,
187        traces,
188        "transcation_position",
189    )
190    .context("check tx hashes in traces")?;
191
192    // VALIDATE LOG ORDERING
193
194    let log_block_nums = logs
195        .column_by_name("block_number")
196        .context("get log block num col")?
197        .as_any()
198        .downcast_ref::<UInt64Array>()
199        .context("get log block num col as u64")?;
200    let log_indices = logs
201        .column_by_name("log_index")
202        .context("get log index column")?
203        .as_any()
204        .downcast_ref::<UInt64Array>()
205        .context("get log index col as u64")?;
206
207    if log_block_nums.null_count() > 0 {
208        return Err(anyhow!("log block number column can't have nulls"));
209    }
210    if log_indices.null_count() > 0 {
211        return Err(anyhow!("log index column can't have nulls"));
212    }
213
214    let mut expected_log_index = 0;
215    let mut current_block_num = first_block_num;
216
217    for (log_idx, log_bn) in log_indices.iter().zip(log_block_nums.iter()) {
218        let log_idx = log_idx.unwrap();
219        let log_bn = log_bn.unwrap();
220
221        if log_bn != current_block_num {
222            if log_bn < current_block_num {
223                return Err(anyhow!(
224                    "found wrong ordering in log block numbers after block num {}",
225                    current_block_num
226                ));
227            }
228
229            expected_log_index = 0;
230            current_block_num = log_bn;
231        }
232
233        if log_idx != expected_log_index {
234            return Err(anyhow!(
235                "found unexpected log index, expected {},{} but got {} for index",
236                log_bn,
237                expected_log_index,
238                log_idx
239            ));
240        }
241        expected_log_index += 1;
242    }
243
244    // VALIDATE TRACE ORDERING
245
246    let trace_block_nums = traces
247        .column_by_name("block_number")
248        .context("get trace block num col")?
249        .as_any()
250        .downcast_ref::<UInt64Array>()
251        .context("get trace block num col as u64")?;
252    let trace_tx_indices = traces
253        .column_by_name("transaction_position")
254        .context("get trace tx index column")?
255        .as_any()
256        .downcast_ref::<UInt64Array>()
257        .context("get trace tx index col as u64")?;
258
259    if trace_block_nums.null_count() > 0 {
260        return Err(anyhow!("log block number column can't have nulls"));
261    }
262
263    let mut current_tx_pos = 0;
264    let mut current_block_num = first_block_num;
265
266    for (trace_bn, trace_tx_pos) in trace_block_nums.iter().zip(trace_tx_indices.iter()) {
267        let prev_bn = current_block_num;
268
269        let trace_bn = trace_bn.unwrap();
270
271        if trace_bn != current_block_num {
272            if trace_bn < current_block_num {
273                return Err(anyhow!(
274                    "found wrong ordering in trace block numbers after block num {}",
275                    current_block_num
276                ));
277            }
278
279            current_tx_pos = 0;
280            current_block_num = trace_bn;
281        }
282
283        let tx_pos = match trace_tx_pos {
284            Some(x) => x,
285            // This can be None for block reward traces and maybe for other traces that don't associate to blocks for some reason
286            None => continue,
287        };
288
289        if tx_pos < current_tx_pos {
290            return Err(anyhow!(
291                "found bad tx position ordering after {},{}",
292                prev_bn,
293                current_tx_pos
294            ));
295        }
296        current_tx_pos = tx_pos;
297    }
298
299    Ok(())
300}
301
302fn validate_block_hashes<'a, F: Fn(u64) -> Option<&'a [u8]>>(
303    get_block_hash: F,
304    data: &RecordBatch,
305) -> Result<()> {
306    let block_hashes = data
307        .column_by_name("block_hash")
308        .context("get block hash column")?
309        .as_any()
310        .downcast_ref::<BinaryArray>()
311        .context("block hash col as binary")?;
312    let block_numbers = data
313        .column_by_name("block_number")
314        .context("get block number column")?
315        .as_any()
316        .downcast_ref::<UInt64Array>()
317        .context("block number as u64")?;
318
319    if block_hashes.null_count() > 0 {
320        return Err(anyhow!("block hash column can't have nulls"));
321    }
322
323    if block_numbers.null_count() > 0 {
324        return Err(anyhow!("block number column can't have nulls"));
325    }
326
327    for (bn, hash) in block_numbers.iter().zip(block_hashes.iter()) {
328        let bn = bn.unwrap();
329        let hash = hash.unwrap();
330
331        let expected = match get_block_hash(bn) {
332            Some(h) => h,
333            None => {
334                return Err(anyhow!("couldn't find expected hash for block {}", bn));
335            }
336        };
337
338        if expected != hash {
339            return Err(anyhow!(
340                "block hash mismatch at block {}. expected {} got {}",
341                bn,
342                faster_hex::hex_string(expected),
343                faster_hex::hex_string(hash)
344            ));
345        }
346    }
347
348    Ok(())
349}
350
351fn validate_transaction_hashes(
352    first_block_num: u64,
353    expected_tx_hashes: &[Vec<[u8; 32]>],
354    data: &RecordBatch,
355    tx_index_col_name: &str,
356) -> Result<()> {
357    let tx_indices = data
358        .column_by_name(tx_index_col_name)
359        .context("get tx index column")?
360        .as_any()
361        .downcast_ref::<UInt64Array>()
362        .context("get tx index col as u64")?;
363    let block_numbers = data
364        .column_by_name("block_number")
365        .context("get block number column")?
366        .as_any()
367        .downcast_ref::<UInt64Array>()
368        .context("block number as u64")?;
369    let tx_hashes = data
370        .column_by_name("transaction_hash")
371        .context("get tx hash column")?
372        .as_any()
373        .downcast_ref::<BinaryArray>()
374        .context("get tx hash col as binary")?;
375
376    if block_numbers.null_count() > 0 {
377        return Err(anyhow!("block number column can't have nulls"));
378    }
379
380    for ((tx_idx, tx_hash), bn) in tx_indices
381        .iter()
382        .zip(tx_hashes.iter())
383        .zip(block_numbers.iter())
384    {
385        // Skip entries that don't associate to transactions, e.g. block reward traces.
386        if let Some(tx_idx) = tx_idx {
387            let bn = bn.unwrap();
388            let tx_hash = match tx_hash {
389                Some(h) => h,
390                None => {
391                    return Err(anyhow!("tx hash no found for tx {},{}", bn, tx_idx));
392                }
393            };
394
395            let block_i = match bn.checked_sub(first_block_num) {
396                Some(i) => usize::try_from(i).unwrap(),
397                None => return Err(anyhow!("bad block num: {}", bn)),
398            };
399
400            let expected_tx_hash = expected_tx_hashes
401                .get(block_i)
402                .with_context(|| format!("block {} not found in given data", bn))?
403                .get(usize::try_from(tx_idx).unwrap())
404                .with_context(|| format!("tx hash data for tx {},{} not found", bn, tx_idx))?;
405
406            if expected_tx_hash != tx_hash {
407                return Err(anyhow!(
408                    "tx hash mismatch for tx {},{}. Expected {}, Found {}",
409                    bn,
410                    tx_idx,
411                    faster_hex::hex_string(expected_tx_hash),
412                    faster_hex::hex_string(tx_hash)
413                ));
414            }
415        }
416    }
417
418    Ok(())
419}