Skip to main content

chaincodec_evm/
decoder.rs

1//! `EvmDecoder` — the ChainDecoder implementation for all EVM chains.
2
3use alloy_core::dyn_abi::{DynSolEvent, DynSolType, DynSolValue};
4use alloy_primitives::B256;
5use chaincodec_core::{
6    chain::ChainFamily,
7    decoder::{BatchDecodeResult, ChainDecoder, ErrorMode, ProgressCallback},
8    error::{BatchDecodeError, DecodeError},
9    event::{DecodedEvent, EventFingerprint, RawEvent},
10    schema::{Schema, SchemaRegistry},
11    types::{CanonicalType, NormalizedValue},
12};
13use rayon::prelude::*;
14use std::collections::HashMap;
15
16use crate::{fingerprint, normalizer};
17
18/// The EVM chain decoder.
19/// Thread-safe, cheap to clone (no heap state).
20#[derive(Debug, Default, Clone)]
21pub struct EvmDecoder;
22
23impl EvmDecoder {
24    pub fn new() -> Self {
25        Self
26    }
27
28    /// Build alloy `DynSolType` from a ChainCodec `CanonicalType`.
29    fn canonical_to_dyn(ty: &CanonicalType) -> Result<DynSolType, DecodeError> {
30        match ty {
31            CanonicalType::Uint(bits) => Ok(DynSolType::Uint(*bits as usize)),
32            CanonicalType::Int(bits) => Ok(DynSolType::Int(*bits as usize)),
33            CanonicalType::Bool => Ok(DynSolType::Bool),
34            CanonicalType::Bytes(n) => Ok(DynSolType::FixedBytes(*n as usize)),
35            CanonicalType::BytesVec => Ok(DynSolType::Bytes),
36            CanonicalType::Str => Ok(DynSolType::String),
37            CanonicalType::Address => Ok(DynSolType::Address),
38            CanonicalType::Array { elem, len } => {
39                let inner = Self::canonical_to_dyn(elem)?;
40                Ok(DynSolType::FixedArray(Box::new(inner), *len as usize))
41            }
42            CanonicalType::Vec(elem) => {
43                let inner = Self::canonical_to_dyn(elem)?;
44                Ok(DynSolType::Array(Box::new(inner)))
45            }
46            CanonicalType::Tuple(fields) => {
47                let types: Result<Vec<DynSolType>, _> =
48                    fields.iter().map(|(_, t)| Self::canonical_to_dyn(t)).collect();
49                Ok(DynSolType::Tuple(types?))
50            }
51            CanonicalType::Hash256 => Ok(DynSolType::FixedBytes(32)),
52            CanonicalType::Timestamp => Ok(DynSolType::Uint(256)),
53            // Pubkey / Bech32 don't exist in EVM — treat as bytes
54            CanonicalType::Pubkey | CanonicalType::Bech32Address => Ok(DynSolType::Bytes),
55            CanonicalType::Decimal { .. } => Ok(DynSolType::Uint(256)),
56        }
57    }
58
59    /// Decode the EVM log data (non-indexed params) as an ABI-encoded tuple.
60    fn decode_data(
61        &self,
62        raw_data: &[u8],
63        data_fields: &[(&str, &chaincodec_core::schema::FieldDef)],
64    ) -> Result<HashMap<String, NormalizedValue>, DecodeError> {
65        if data_fields.is_empty() {
66            return Ok(HashMap::new());
67        }
68
69        let tuple_types: Result<Vec<DynSolType>, _> = data_fields
70            .iter()
71            .map(|(_, f)| Self::canonical_to_dyn(&f.ty))
72            .collect();
73        let tuple_types = tuple_types?;
74
75        let tuple_type = DynSolType::Tuple(tuple_types);
76        let decoded = tuple_type
77            .abi_decode(raw_data)
78            .map_err(|e| DecodeError::AbiDecodeFailed {
79                reason: e.to_string(),
80            })?;
81
82        let values = match decoded {
83            DynSolValue::Tuple(vals) => vals,
84            other => vec![other],
85        };
86
87        let mut out = HashMap::new();
88        for ((name, _), val) in data_fields.iter().zip(values.into_iter()) {
89            out.insert(name.to_string(), normalizer::normalize(val));
90        }
91        Ok(out)
92    }
93
94    /// Decode a single indexed topic (always 32 bytes, ABI-encoded).
95    ///
96    /// # EVM ABI indexed-parameter encoding rules
97    /// - **Value types** (uint, int, bool, address, bytes1–bytes32): padded to
98    ///   32 bytes, stored directly — we can ABI-decode and recover the value.
99    /// - **Reference types** (string, bytes, arrays, tuples): stored as the
100    ///   `keccak256` of their ABI-encoded form — the original value is
101    ///   **unrecoverable**. We return the raw 32-byte hash as `Bytes`.
102    fn decode_topic(
103        &self,
104        topic_hex: &str,
105        ty: &CanonicalType,
106    ) -> Result<NormalizedValue, DecodeError> {
107        let hex = topic_hex.strip_prefix("0x").unwrap_or(topic_hex);
108        let bytes = hex::decode(hex).map_err(|e| DecodeError::InvalidRawEvent {
109            reason: format!("invalid topic hex: {e}"),
110        })?;
111
112        // Reference types are hashed in indexed position — return raw bytes.
113        match ty {
114            CanonicalType::Str
115            | CanonicalType::BytesVec
116            | CanonicalType::Vec(_)
117            | CanonicalType::Array { .. }
118            | CanonicalType::Tuple(_) => {
119                return Ok(NormalizedValue::Bytes(bytes));
120            }
121            _ => {}
122        }
123
124        let dyn_type = Self::canonical_to_dyn(ty)?;
125        // Value types: ABI-encoded into exactly 32 bytes
126        match dyn_type.abi_decode(&bytes) {
127            Ok(val) => Ok(normalizer::normalize(val)),
128            Err(e) => Err(DecodeError::AbiDecodeFailed {
129                reason: format!("topic decode: {e}"),
130            }),
131        }
132    }
133}
134
135impl ChainDecoder for EvmDecoder {
136    fn chain_family(&self) -> ChainFamily {
137        ChainFamily::Evm
138    }
139
140    fn fingerprint(&self, raw: &RawEvent) -> EventFingerprint {
141        fingerprint::from_topics(&raw.topics)
142            .unwrap_or_else(|| EventFingerprint::new("0x".repeat(32)))
143    }
144
145    fn decode_event(
146        &self,
147        raw: &RawEvent,
148        schema: &Schema,
149    ) -> Result<DecodedEvent, DecodeError> {
150        let mut fields: HashMap<String, NormalizedValue> = HashMap::new();
151        let mut decode_errors: HashMap<String, String> = HashMap::new();
152
153        // Indexed fields → topics[1..]
154        let indexed_fields = schema.indexed_fields();
155        for (i, (name, field_def)) in indexed_fields.iter().enumerate() {
156            let topic_idx = i + 1; // topics[0] is the event sig
157            match raw.topics.get(topic_idx) {
158                Some(topic) => match self.decode_topic(topic, &field_def.ty) {
159                    Ok(val) => { fields.insert(name.to_string(), val); }
160                    Err(e) => { decode_errors.insert(name.to_string(), e.to_string()); }
161                },
162                None => {
163                    if !field_def.nullable {
164                        return Err(DecodeError::MissingField { field: name.to_string() });
165                    }
166                    fields.insert(name.to_string(), NormalizedValue::Null);
167                }
168            }
169        }
170
171        // Non-indexed fields → data payload
172        let data_fields = schema.data_fields();
173        match self.decode_data(&raw.data, &data_fields) {
174            Ok(decoded) => fields.extend(decoded),
175            Err(e) => {
176                // Record error but still return partial result
177                decode_errors.insert("__data__".into(), e.to_string());
178            }
179        }
180
181        Ok(DecodedEvent {
182            chain: raw.chain.clone(),
183            schema: schema.name.clone(),
184            schema_version: schema.version,
185            tx_hash: raw.tx_hash.clone(),
186            block_number: raw.block_number,
187            block_timestamp: raw.block_timestamp,
188            log_index: raw.log_index,
189            address: raw.address.clone(),
190            fields,
191            fingerprint: self.fingerprint(raw),
192            decode_errors,
193        })
194    }
195
196    /// Override default batch with Rayon parallel decode.
197    fn decode_batch(
198        &self,
199        logs: &[RawEvent],
200        registry: &dyn SchemaRegistry,
201        mode: ErrorMode,
202        progress: Option<&dyn ProgressCallback>,
203    ) -> Result<BatchDecodeResult, BatchDecodeError> {
204        // Parallel decode using Rayon; fall back to sequential for progress callbacks
205        // because Rayon threads can't share the callback cleanly.
206        if progress.is_some() {
207            // Use the default sequential implementation when progress tracking is needed
208            return chaincodec_core::decoder::ChainDecoder::decode_batch(
209                self, logs, registry, mode, progress,
210            );
211        }
212
213        let results: Vec<(usize, Result<DecodedEvent, DecodeError>)> = logs
214            .par_iter()
215            .enumerate()
216            .map(|(idx, raw)| {
217                let fp = self.fingerprint(raw);
218                let schema = registry.get_by_fingerprint(&fp);
219                match schema {
220                    None => (
221                        idx,
222                        Err(DecodeError::SchemaNotFound {
223                            fingerprint: fp.to_string(),
224                        }),
225                    ),
226                    Some(s) => (idx, self.decode_event(raw, &s)),
227                }
228            })
229            .collect();
230
231        let mut events = Vec::with_capacity(logs.len());
232        let mut errors = Vec::new();
233
234        for (idx, result) in results {
235            match result {
236                Ok(event) => events.push(event),
237                Err(err) => match mode {
238                    ErrorMode::Skip => {}
239                    ErrorMode::Collect => errors.push((idx, err)),
240                    ErrorMode::Throw => {
241                        return Err(BatchDecodeError::ItemFailed {
242                            index: idx,
243                            source: err,
244                        });
245                    }
246                },
247            }
248        }
249
250        Ok(BatchDecodeResult { events, errors })
251    }
252
253    fn supports_abi_guess(&self) -> bool {
254        // Future: use 4byte.directory / samczsun's ABI lookup
255        false
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262    use chaincodec_core::chain::chains;
263
264    fn erc20_transfer_raw() -> RawEvent {
265        // Real ERC-20 Transfer event log data (truncated/simplified for test)
266        RawEvent {
267            chain: chains::ethereum(),
268            tx_hash: "0xabc123".into(),
269            block_number: 19_000_000,
270            block_timestamp: 1_700_000_000,
271            log_index: 0,
272            topics: vec![
273                // Transfer(address,address,uint256)
274                "0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef".into(),
275                // from (padded to 32 bytes)
276                "0x000000000000000000000000d8da6bf26964af9d7eed9e03e53415d37aa96045".into(),
277                // to (padded to 32 bytes)
278                "0x000000000000000000000000ab5801a7d398351b8be11c439e05c5b3259aec9b".into(),
279            ],
280            // value: 1000000000000000000 (1 ETH in wei) — uint256, 32 bytes big-endian
281            data: {
282                let mut d = vec![0u8; 32];
283                d[24..].copy_from_slice(&1_000_000_000_000_000_000u64.to_be_bytes());
284                d
285            },
286            address: "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48".into(),
287            raw_receipt: None,
288        }
289    }
290
291    #[test]
292    fn evm_decoder_fingerprint() {
293        let dec = EvmDecoder::new();
294        let raw = erc20_transfer_raw();
295        let fp = dec.fingerprint(&raw);
296        assert_eq!(
297            fp.as_hex(),
298            "0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef"
299        );
300    }
301}