cherry_evm_validate/
lib.rs1use anyhow::{anyhow, Context, Result};
2use arrow::{
3 array::{Array, BinaryArray, UInt64Array},
4 record_batch::RecordBatch,
5};
6
7pub fn validate_block_data(
18 blocks: &RecordBatch,
19 transactions: &RecordBatch,
20 logs: &RecordBatch,
21 traces: &RecordBatch,
22) -> Result<()> {
23 let block_numbers = blocks
24 .column_by_name("number")
25 .context("get block number column")?
26 .as_any()
27 .downcast_ref::<UInt64Array>()
28 .context("get block number column as u64")?;
29
30 if block_numbers.null_count() > 0 {
31 return Err(anyhow!("block.number column can't have nulls"));
32 }
33
34 let first_block_num = block_numbers
35 .iter()
36 .next()
37 .map(Option::unwrap)
38 .unwrap_or_default();
39 let mut current_bn = first_block_num;
40 for bn in block_numbers.iter().skip(1) {
41 let bn = bn.unwrap();
42 if current_bn + 1 != bn {
43 return Err(anyhow!(
44 "block.number column is not consistent. {} != {}",
45 current_bn + 1,
46 bn
47 ));
48 }
49 current_bn = bn;
50 }
51
52 let block_hashes = blocks
53 .column_by_name("hash")
54 .context("get block hash column")?
55 .as_any()
56 .downcast_ref::<BinaryArray>()
57 .context("get block hash as binary array")?;
58
59 let block_parent_hashes = blocks
60 .column_by_name("parent_hash")
61 .context("get block parent_hash column")?
62 .as_any()
63 .downcast_ref::<BinaryArray>()
64 .context("get block parent_hash as binary array")?;
65
66 let get_block_hash = |block_num: u64| -> Option<&[u8]> {
67 let pos = usize::try_from(block_num.checked_sub(first_block_num)?).unwrap();
68 if pos < block_hashes.len() {
69 Some(block_hashes.value(pos))
70 } else {
71 None
72 }
73 };
74
75 if block_hashes.null_count() > 0 {
76 return Err(anyhow!("block.hash column can't have nulls"));
77 }
78
79 if block_parent_hashes.null_count() > 0 {
80 return Err(anyhow!("block.parent_has column can't have nulls"));
81 }
82
83 for (expected_parent_hash, parent_hash) in
84 block_hashes.iter().zip(block_parent_hashes.iter().skip(1))
85 {
86 let expected_parent_hash = expected_parent_hash.unwrap();
87 let parent_hash = parent_hash.unwrap();
88 if expected_parent_hash != parent_hash {
89 return Err(anyhow!(
90 "bad parent hash found. expected {}, found {}",
91 faster_hex::hex_string(expected_parent_hash),
92 faster_hex::hex_string(parent_hash)
93 ));
94 }
95 }
96
97 validate_block_hashes(get_block_hash, transactions).context("validate tx block hashes")?;
98 validate_block_hashes(get_block_hash, logs).context("validate log block hashes")?;
99 validate_block_hashes(get_block_hash, traces).context("validate trace block hashes")?;
100
101 let mut tx_hash_mapping = vec![Vec::<[u8; 32]>::with_capacity(200); block_numbers.len()];
104
105 let tx_hashes = transactions
106 .column_by_name("hash")
107 .context("get tx hash col")?
108 .as_any()
109 .downcast_ref::<BinaryArray>()
110 .context("tx hash col as binary")?;
111 let tx_block_nums = transactions
112 .column_by_name("block_number")
113 .context("get tx block num col")?
114 .as_any()
115 .downcast_ref::<UInt64Array>()
116 .context("get tx block num col as u64")?;
117 let tx_indices = transactions
118 .column_by_name("transaction_index")
119 .context("get tx index column")?
120 .as_any()
121 .downcast_ref::<UInt64Array>()
122 .context("get tx index col as u64")?;
123
124 if tx_hashes.null_count() > 0 {
125 return Err(anyhow!("tx hash column can't have nulls"));
126 }
127 if tx_block_nums.null_count() > 0 {
128 return Err(anyhow!("tx block number column can't have nulls"));
129 }
130 if tx_indices.null_count() > 0 {
131 return Err(anyhow!("tx index column can't have nulls"));
132 }
133
134 let mut expected_tx_index = 0;
135 let mut current_block_num = first_block_num;
136
137 for ((tx_hash, tx_bn), tx_idx) in tx_hashes
138 .iter()
139 .zip(tx_block_nums.iter())
140 .zip(tx_indices.iter())
141 {
142 let tx_hash = tx_hash.unwrap();
143 let tx_bn = tx_bn.unwrap();
144 let tx_idx = tx_idx.unwrap();
145
146 if tx_bn != current_block_num {
147 if tx_bn < current_block_num {
148 return Err(anyhow!(
149 "found wrong ordering in tx block numbers after block num {}",
150 current_block_num
151 ));
152 }
153
154 current_block_num = tx_bn;
155 expected_tx_index = 0;
156 }
157
158 if tx_idx != expected_tx_index {
159 return Err(anyhow!(
160 "found unexpected tx index at the start of block {}",
161 current_block_num
162 ));
163 }
164 expected_tx_index += 1;
165
166 let block_pos = tx_bn
167 .checked_sub(first_block_num)
168 .with_context(|| format!("unexpected block num {} in transactions", tx_bn))?;
169 let mappings = tx_hash_mapping
170 .get_mut(usize::try_from(block_pos).unwrap())
171 .unwrap();
172
173 assert_eq!(mappings.len(), usize::try_from(tx_idx).unwrap());
174
175 if tx_hash.len() != 32 {
176 return Err(anyhow!("found bad tx hash at {},{}", tx_bn, tx_idx));
177 }
178
179 mappings.push(tx_hash.try_into().unwrap());
180 }
181
182 validate_transaction_hashes(first_block_num, &tx_hash_mapping, logs, "transaction_index")
183 .context("check tx hashes in logs")?;
184 validate_transaction_hashes(
185 first_block_num,
186 &tx_hash_mapping,
187 traces,
188 "transcation_position",
189 )
190 .context("check tx hashes in traces")?;
191
192 let log_block_nums = logs
195 .column_by_name("block_number")
196 .context("get log block num col")?
197 .as_any()
198 .downcast_ref::<UInt64Array>()
199 .context("get log block num col as u64")?;
200 let log_indices = logs
201 .column_by_name("log_index")
202 .context("get log index column")?
203 .as_any()
204 .downcast_ref::<UInt64Array>()
205 .context("get log index col as u64")?;
206
207 if log_block_nums.null_count() > 0 {
208 return Err(anyhow!("log block number column can't have nulls"));
209 }
210 if log_indices.null_count() > 0 {
211 return Err(anyhow!("log index column can't have nulls"));
212 }
213
214 let mut expected_log_index = 0;
215 let mut current_block_num = first_block_num;
216
217 for (log_idx, log_bn) in log_indices.iter().zip(log_block_nums.iter()) {
218 let log_idx = log_idx.unwrap();
219 let log_bn = log_bn.unwrap();
220
221 if log_bn != current_block_num {
222 if log_bn < current_block_num {
223 return Err(anyhow!(
224 "found wrong ordering in log block numbers after block num {}",
225 current_block_num
226 ));
227 }
228
229 expected_log_index = 0;
230 current_block_num = log_bn;
231 }
232
233 if log_idx != expected_log_index {
234 return Err(anyhow!(
235 "found unexpected log index, expected {},{} but got {} for index",
236 log_bn,
237 expected_log_index,
238 log_idx
239 ));
240 }
241 expected_log_index += 1;
242 }
243
244 let trace_block_nums = traces
247 .column_by_name("block_number")
248 .context("get trace block num col")?
249 .as_any()
250 .downcast_ref::<UInt64Array>()
251 .context("get trace block num col as u64")?;
252 let trace_tx_indices = traces
253 .column_by_name("transaction_position")
254 .context("get trace tx index column")?
255 .as_any()
256 .downcast_ref::<UInt64Array>()
257 .context("get trace tx index col as u64")?;
258
259 if trace_block_nums.null_count() > 0 {
260 return Err(anyhow!("log block number column can't have nulls"));
261 }
262
263 let mut current_tx_pos = 0;
264 let mut current_block_num = first_block_num;
265
266 for (trace_bn, trace_tx_pos) in trace_block_nums.iter().zip(trace_tx_indices.iter()) {
267 let prev_bn = current_block_num;
268
269 let trace_bn = trace_bn.unwrap();
270
271 if trace_bn != current_block_num {
272 if trace_bn < current_block_num {
273 return Err(anyhow!(
274 "found wrong ordering in trace block numbers after block num {}",
275 current_block_num
276 ));
277 }
278
279 current_tx_pos = 0;
280 current_block_num = trace_bn;
281 }
282
283 let tx_pos = match trace_tx_pos {
284 Some(x) => x,
285 None => continue,
287 };
288
289 if tx_pos < current_tx_pos {
290 return Err(anyhow!(
291 "found bad tx position ordering after {},{}",
292 prev_bn,
293 current_tx_pos
294 ));
295 }
296 current_tx_pos = tx_pos;
297 }
298
299 Ok(())
300}
301
302fn validate_block_hashes<'a, F: Fn(u64) -> Option<&'a [u8]>>(
303 get_block_hash: F,
304 data: &RecordBatch,
305) -> Result<()> {
306 let block_hashes = data
307 .column_by_name("block_hash")
308 .context("get block hash column")?
309 .as_any()
310 .downcast_ref::<BinaryArray>()
311 .context("block hash col as binary")?;
312 let block_numbers = data
313 .column_by_name("block_number")
314 .context("get block number column")?
315 .as_any()
316 .downcast_ref::<UInt64Array>()
317 .context("block number as u64")?;
318
319 if block_hashes.null_count() > 0 {
320 return Err(anyhow!("block hash column can't have nulls"));
321 }
322
323 if block_numbers.null_count() > 0 {
324 return Err(anyhow!("block number column can't have nulls"));
325 }
326
327 for (bn, hash) in block_numbers.iter().zip(block_hashes.iter()) {
328 let bn = bn.unwrap();
329 let hash = hash.unwrap();
330
331 let expected = match get_block_hash(bn) {
332 Some(h) => h,
333 None => {
334 return Err(anyhow!("couldn't find expected hash for block {}", bn));
335 }
336 };
337
338 if expected != hash {
339 return Err(anyhow!(
340 "block hash mismatch at block {}. expected {} got {}",
341 bn,
342 faster_hex::hex_string(expected),
343 faster_hex::hex_string(hash)
344 ));
345 }
346 }
347
348 Ok(())
349}
350
351fn validate_transaction_hashes(
352 first_block_num: u64,
353 expected_tx_hashes: &[Vec<[u8; 32]>],
354 data: &RecordBatch,
355 tx_index_col_name: &str,
356) -> Result<()> {
357 let tx_indices = data
358 .column_by_name(tx_index_col_name)
359 .context("get tx index column")?
360 .as_any()
361 .downcast_ref::<UInt64Array>()
362 .context("get tx index col as u64")?;
363 let block_numbers = data
364 .column_by_name("block_number")
365 .context("get block number column")?
366 .as_any()
367 .downcast_ref::<UInt64Array>()
368 .context("block number as u64")?;
369 let tx_hashes = data
370 .column_by_name("transaction_hash")
371 .context("get tx hash column")?
372 .as_any()
373 .downcast_ref::<BinaryArray>()
374 .context("get tx hash col as binary")?;
375
376 if block_numbers.null_count() > 0 {
377 return Err(anyhow!("block number column can't have nulls"));
378 }
379
380 for ((tx_idx, tx_hash), bn) in tx_indices
381 .iter()
382 .zip(tx_hashes.iter())
383 .zip(block_numbers.iter())
384 {
385 if let Some(tx_idx) = tx_idx {
387 let bn = bn.unwrap();
388 let tx_hash = match tx_hash {
389 Some(h) => h,
390 None => {
391 return Err(anyhow!("tx hash no found for tx {},{}", bn, tx_idx));
392 }
393 };
394
395 let block_i = match bn.checked_sub(first_block_num) {
396 Some(i) => usize::try_from(i).unwrap(),
397 None => return Err(anyhow!("bad block num: {}", bn)),
398 };
399
400 let expected_tx_hash = expected_tx_hashes
401 .get(block_i)
402 .with_context(|| format!("block {} not found in given data", bn))?
403 .get(usize::try_from(tx_idx).unwrap())
404 .with_context(|| format!("tx hash data for tx {},{} not found", bn, tx_idx))?;
405
406 if expected_tx_hash != tx_hash {
407 return Err(anyhow!(
408 "tx hash mismatch for tx {},{}. Expected {}, Found {}",
409 bn,
410 tx_idx,
411 faster_hex::hex_string(expected_tx_hash),
412 faster_hex::hex_string(tx_hash)
413 ));
414 }
415 }
416 }
417
418 Ok(())
419}