foundry_fork_db/
cache.rs

1//! Cache related abstraction
2use alloy_consensus::BlockHeader;
3use alloy_primitives::{Address, B256, U256};
4use alloy_provider::network::TransactionResponse;
5use parking_lot::RwLock;
6use revm::{
7    context::BlockEnv,
8    context_interface::block::BlobExcessGasAndPrice,
9    primitives::{
10        map::{AddressHashMap, HashMap},
11        KECCAK_EMPTY,
12    },
13    state::{Account, AccountInfo, AccountStatus},
14    DatabaseCommit,
15};
16use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
17use std::{
18    collections::BTreeSet,
19    fs,
20    io::{BufWriter, Write},
21    path::{Path, PathBuf},
22    sync::Arc,
23};
24use url::Url;
25
26pub type StorageInfo = HashMap<U256, U256>;
27
28/// A shareable Block database
29#[derive(Clone, Debug)]
30pub struct BlockchainDb {
31    /// Contains all the data
32    db: Arc<MemDb>,
33    /// metadata of the current config
34    meta: Arc<RwLock<BlockchainDbMeta>>,
35    /// the cache that can be flushed
36    cache: Arc<JsonBlockCacheDB>,
37}
38
39impl BlockchainDb {
40    /// Creates a new instance of the [BlockchainDb].
41    ///
42    /// If a `cache_path` is provided it attempts to load a previously stored [JsonBlockCacheData]
43    /// and will try to use the cached entries it holds.
44    ///
45    /// This will return a new and empty [MemDb] if
46    ///   - `cache_path` is `None`
47    ///   - the file the `cache_path` points to, does not exist
48    ///   - the file contains malformed data, or if it couldn't be read
49    ///   - the provided `meta` differs from [BlockchainDbMeta] that's stored on disk
50    pub fn new(meta: BlockchainDbMeta, cache_path: Option<PathBuf>) -> Self {
51        Self::new_db(meta, cache_path, false)
52    }
53
54    /// Creates a new instance of the [BlockchainDb] and skips check when comparing meta
55    /// This is useful for offline-start mode when we don't want to fetch metadata of `block`.
56    ///
57    /// if a `cache_path` is provided it attempts to load a previously stored [JsonBlockCacheData]
58    /// and will try to use the cached entries it holds.
59    ///
60    /// This will return a new and empty [MemDb] if
61    ///   - `cache_path` is `None`
62    ///   - the file the `cache_path` points to, does not exist
63    ///   - the file contains malformed data, or if it couldn't be read
64    ///   - the provided `meta` differs from [BlockchainDbMeta] that's stored on disk
65    pub fn new_skip_check(meta: BlockchainDbMeta, cache_path: Option<PathBuf>) -> Self {
66        Self::new_db(meta, cache_path, true)
67    }
68
69    fn new_db(meta: BlockchainDbMeta, cache_path: Option<PathBuf>, skip_check: bool) -> Self {
70        trace!(target: "forge::cache", cache=?cache_path, "initialising blockchain db");
71        // read cache and check if metadata matches
72        let cache = cache_path
73            .as_ref()
74            .and_then(|p| {
75                JsonBlockCacheDB::load(p).ok().filter(|cache| {
76                    if skip_check {
77                        return true;
78                    }
79                    let mut existing = cache.meta().write();
80                    existing.hosts.extend(meta.hosts.clone());
81                    if meta != *existing {
82                        warn!(target: "cache", "non-matching block metadata");
83                        false
84                    } else {
85                        true
86                    }
87                })
88            })
89            .unwrap_or_else(|| JsonBlockCacheDB::new(Arc::new(RwLock::new(meta)), cache_path));
90
91        Self { db: Arc::clone(cache.db()), meta: Arc::clone(cache.meta()), cache: Arc::new(cache) }
92    }
93
94    /// Returns the map that holds the account related info
95    pub fn accounts(&self) -> &RwLock<AddressHashMap<AccountInfo>> {
96        &self.db.accounts
97    }
98
99    /// Returns the map that holds the storage related info
100    pub fn storage(&self) -> &RwLock<AddressHashMap<StorageInfo>> {
101        &self.db.storage
102    }
103
104    /// Returns the map that holds all the block hashes
105    pub fn block_hashes(&self) -> &RwLock<HashMap<U256, B256>> {
106        &self.db.block_hashes
107    }
108
109    /// Returns the Env related metadata
110    pub const fn meta(&self) -> &Arc<RwLock<BlockchainDbMeta>> {
111        &self.meta
112    }
113
114    /// Returns the inner cache
115    pub const fn cache(&self) -> &Arc<JsonBlockCacheDB> {
116        &self.cache
117    }
118
119    /// Returns the underlying storage
120    pub const fn db(&self) -> &Arc<MemDb> {
121        &self.db
122    }
123}
124
125/// relevant identifying markers in the context of [BlockchainDb]
126#[derive(Clone, Debug, Eq, Serialize, Default)]
127pub struct BlockchainDbMeta {
128    /// The block environment
129    pub block_env: BlockEnv,
130    /// All the hosts used to connect to
131    pub hosts: BTreeSet<String>,
132}
133
134impl BlockchainDbMeta {
135    /// Creates a new instance
136    pub fn new(block_env: BlockEnv, url: String) -> Self {
137        let host = Url::parse(&url)
138            .ok()
139            .and_then(|url| url.host().map(|host| host.to_string()))
140            .unwrap_or(url);
141
142        Self { block_env, hosts: BTreeSet::from([host]) }
143    }
144
145    /// Sets the [BlockEnv] of this instance using the provided [alloy_rpc_types::Block]
146    pub fn with_block<T: TransactionResponse, H: BlockHeader>(
147        mut self,
148        block: &alloy_rpc_types::Block<T, H>,
149    ) -> Self {
150        self.block_env = BlockEnv {
151            number: block.header.number(),
152            beneficiary: block.header.beneficiary(),
153            timestamp: block.header.timestamp(),
154            difficulty: U256::from(block.header.difficulty()),
155            basefee: block.header.base_fee_per_gas().unwrap_or_default(),
156            gas_limit: block.header.gas_limit(),
157            prevrandao: block.header.mix_hash(),
158            blob_excess_gas_and_price: Some(BlobExcessGasAndPrice::new(
159                block.header.excess_blob_gas().unwrap_or_default(),
160                false,
161            )),
162        };
163
164        self
165    }
166
167    /// Infers the host from the provided url and adds it to the set of hosts
168    pub fn with_url(mut self, url: &str) -> Self {
169        let host = Url::parse(url)
170            .ok()
171            .and_then(|url| url.host().map(|host| host.to_string()))
172            .unwrap_or(url.to_string());
173        self.hosts.insert(host);
174        self
175    }
176
177    /// Sets the [BlockEnv] of this instance
178    pub fn set_block_env(mut self, block_env: revm::context::BlockEnv) {
179        self.block_env = block_env;
180    }
181}
182
183// ignore hosts to not invalidate the cache when different endpoints are used, as it's commonly the
184// case for http vs ws endpoints
185impl PartialEq for BlockchainDbMeta {
186    fn eq(&self, other: &Self) -> bool {
187        self.block_env == other.block_env
188    }
189}
190
191impl<'de> Deserialize<'de> for BlockchainDbMeta {
192    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
193    where
194        D: Deserializer<'de>,
195    {
196        /// A backwards compatible representation of [revm::primitives::BlockEnv]
197        ///
198        /// This prevents deserialization errors of cache files caused by breaking changes to the
199        /// default [revm::primitives::BlockEnv], for example enabling an optional feature.
200        /// By hand rolling deserialize impl we can prevent cache file issues
201        struct BlockEnvBackwardsCompat {
202            inner: revm::context::BlockEnv,
203        }
204
205        impl<'de> Deserialize<'de> for BlockEnvBackwardsCompat {
206            fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
207            where
208                D: Deserializer<'de>,
209            {
210                let mut value = serde_json::Value::deserialize(deserializer)?;
211
212                // we check for any missing fields here
213                if let Some(obj) = value.as_object_mut() {
214                    let default_value =
215                        serde_json::to_value(revm::context::BlockEnv::default()).unwrap();
216                    for (key, value) in default_value.as_object().unwrap() {
217                        if !obj.contains_key(key) {
218                            obj.insert(key.to_string(), value.clone());
219                        }
220                    }
221                }
222
223                let cfg_env: revm::context::BlockEnv =
224                    serde_json::from_value(value).map_err(serde::de::Error::custom)?;
225                Ok(Self { inner: cfg_env })
226            }
227        }
228
229        // custom deserialize impl to not break existing cache files
230        #[derive(Deserialize)]
231        struct Meta {
232            block_env: BlockEnvBackwardsCompat,
233            /// all the hosts used to connect to
234            #[serde(alias = "host")]
235            hosts: Hosts,
236        }
237
238        #[derive(Deserialize)]
239        #[serde(untagged)]
240        enum Hosts {
241            Multi(BTreeSet<String>),
242            Single(String),
243        }
244
245        let Meta { block_env, hosts } = Meta::deserialize(deserializer)?;
246        Ok(Self {
247            block_env: block_env.inner,
248            hosts: match hosts {
249                Hosts::Multi(hosts) => hosts,
250                Hosts::Single(host) => BTreeSet::from([host]),
251            },
252        })
253    }
254}
255
256/// In Memory cache containing all fetched accounts and storage slots
257/// and their values from RPC
258#[derive(Debug, Default)]
259pub struct MemDb {
260    /// Account related data
261    pub accounts: RwLock<AddressHashMap<AccountInfo>>,
262    /// Storage related data
263    pub storage: RwLock<AddressHashMap<StorageInfo>>,
264    /// All retrieved block hashes
265    pub block_hashes: RwLock<HashMap<U256, B256>>,
266}
267
268impl MemDb {
269    /// Clears all data stored in this db
270    pub fn clear(&self) {
271        self.accounts.write().clear();
272        self.storage.write().clear();
273        self.block_hashes.write().clear();
274    }
275
276    // Inserts the account, replacing it if it exists already
277    pub fn do_insert_account(&self, address: Address, account: AccountInfo) {
278        self.accounts.write().insert(address, account);
279    }
280
281    /// The implementation of [DatabaseCommit::commit()]
282    pub fn do_commit(&self, changes: HashMap<Address, Account>) {
283        let mut storage = self.storage.write();
284        let mut accounts = self.accounts.write();
285        for (add, mut acc) in changes {
286            if acc.is_empty() || acc.is_selfdestructed() {
287                accounts.remove(&add);
288                storage.remove(&add);
289            } else {
290                // insert account
291                if let Some(code_hash) = acc
292                    .info
293                    .code
294                    .as_ref()
295                    .filter(|code| !code.is_empty())
296                    .map(|code| code.hash_slow())
297                {
298                    acc.info.code_hash = code_hash;
299                } else if acc.info.code_hash.is_zero() {
300                    acc.info.code_hash = KECCAK_EMPTY;
301                }
302                accounts.insert(add, acc.info);
303
304                let acc_storage = storage.entry(add).or_default();
305                if acc.status.contains(AccountStatus::Created) {
306                    acc_storage.clear();
307                }
308                for (index, value) in acc.storage {
309                    if value.present_value().is_zero() {
310                        acc_storage.remove(&index);
311                    } else {
312                        acc_storage.insert(index, value.present_value());
313                    }
314                }
315                if acc_storage.is_empty() {
316                    storage.remove(&add);
317                }
318            }
319        }
320    }
321}
322
323impl Clone for MemDb {
324    fn clone(&self) -> Self {
325        Self {
326            storage: RwLock::new(self.storage.read().clone()),
327            accounts: RwLock::new(self.accounts.read().clone()),
328            block_hashes: RwLock::new(self.block_hashes.read().clone()),
329        }
330    }
331}
332
333impl DatabaseCommit for MemDb {
334    fn commit(&mut self, changes: HashMap<Address, Account>) {
335        self.do_commit(changes)
336    }
337}
338
339/// A DB that stores the cached content in a json file
340#[derive(Debug)]
341pub struct JsonBlockCacheDB {
342    /// Where this cache file is stored.
343    ///
344    /// If this is a [None] then caching is disabled
345    cache_path: Option<PathBuf>,
346    /// Object that's stored in a json file
347    data: JsonBlockCacheData,
348}
349
350impl JsonBlockCacheDB {
351    /// Creates a new instance.
352    fn new(meta: Arc<RwLock<BlockchainDbMeta>>, cache_path: Option<PathBuf>) -> Self {
353        Self { cache_path, data: JsonBlockCacheData { meta, data: Arc::new(Default::default()) } }
354    }
355
356    /// Loads the contents of the diskmap file and returns the read object
357    ///
358    /// # Errors
359    /// This will fail if
360    ///   - the `path` does not exist
361    ///   - the format does not match [JsonBlockCacheData]
362    pub fn load(path: impl Into<PathBuf>) -> eyre::Result<Self> {
363        let path = path.into();
364        trace!(target: "cache", ?path, "reading json cache");
365        let contents = std::fs::read_to_string(&path).map_err(|err| {
366            warn!(?err, ?path, "Failed to read cache file");
367            err
368        })?;
369        let data = serde_json::from_str(&contents).map_err(|err| {
370            warn!(target: "cache", ?err, ?path, "Failed to deserialize cache data");
371            err
372        })?;
373        Ok(Self { cache_path: Some(path), data })
374    }
375
376    /// Returns the [MemDb] it holds access to
377    pub const fn db(&self) -> &Arc<MemDb> {
378        &self.data.data
379    }
380
381    /// Metadata stored alongside the data
382    pub const fn meta(&self) -> &Arc<RwLock<BlockchainDbMeta>> {
383        &self.data.meta
384    }
385
386    /// Returns `true` if this is a transient cache and nothing will be flushed
387    pub const fn is_transient(&self) -> bool {
388        self.cache_path.is_none()
389    }
390
391    /// Flushes the DB to disk if caching is enabled.
392    #[instrument(level = "warn", skip_all, fields(path = ?self.cache_path))]
393    pub fn flush(&self) {
394        let Some(path) = &self.cache_path else { return };
395        self.flush_to(path.as_path());
396    }
397
398    /// Flushes the DB to a specific file
399    pub fn flush_to(&self, cache_path: &Path) {
400        let path: &Path = cache_path;
401
402        trace!(target: "cache", "saving json cache");
403
404        if let Some(parent) = path.parent() {
405            let _ = fs::create_dir_all(parent);
406        }
407
408        let file = match fs::File::create(path) {
409            Ok(file) => file,
410            Err(e) => return warn!(target: "cache", %e, "Failed to open json cache for writing"),
411        };
412
413        let mut writer = BufWriter::new(file);
414        if let Err(e) = serde_json::to_writer(&mut writer, &self.data) {
415            return warn!(target: "cache", %e, "Failed to write to json cache");
416        }
417        if let Err(e) = writer.flush() {
418            return warn!(target: "cache", %e, "Failed to flush to json cache");
419        }
420
421        trace!(target: "cache", "saved json cache");
422    }
423
424    /// Returns the cache path.
425    pub fn cache_path(&self) -> Option<&Path> {
426        self.cache_path.as_deref()
427    }
428}
429
430/// The Data the [JsonBlockCacheDB] can read and flush
431///
432/// This will be deserialized in a JSON object with the keys:
433/// `["meta", "accounts", "storage", "block_hashes"]`
434#[derive(Debug)]
435pub struct JsonBlockCacheData {
436    pub meta: Arc<RwLock<BlockchainDbMeta>>,
437    pub data: Arc<MemDb>,
438}
439
440impl Serialize for JsonBlockCacheData {
441    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
442    where
443        S: Serializer,
444    {
445        let mut map = serializer.serialize_map(Some(4))?;
446
447        map.serialize_entry("meta", &*self.meta.read())?;
448        map.serialize_entry("accounts", &*self.data.accounts.read())?;
449        map.serialize_entry("storage", &*self.data.storage.read())?;
450        map.serialize_entry("block_hashes", &*self.data.block_hashes.read())?;
451
452        map.end()
453    }
454}
455
456impl<'de> Deserialize<'de> for JsonBlockCacheData {
457    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
458    where
459        D: Deserializer<'de>,
460    {
461        #[derive(Deserialize)]
462        struct Data {
463            meta: BlockchainDbMeta,
464            accounts: AddressHashMap<AccountInfo>,
465            storage: AddressHashMap<HashMap<U256, U256>>,
466            block_hashes: HashMap<U256, B256>,
467        }
468
469        let Data { meta, accounts, storage, block_hashes } = Data::deserialize(deserializer)?;
470
471        Ok(Self {
472            meta: Arc::new(RwLock::new(meta)),
473            data: Arc::new(MemDb {
474                accounts: RwLock::new(accounts),
475                storage: RwLock::new(storage),
476                block_hashes: RwLock::new(block_hashes),
477            }),
478        })
479    }
480}
481
482/// A type that flushes a `JsonBlockCacheDB` on drop
483///
484/// This type intentionally does not implement `Clone` since it's intended that there's only once
485/// instance that will flush the cache.
486#[derive(Debug)]
487pub struct FlushJsonBlockCacheDB(pub Arc<JsonBlockCacheDB>);
488
489impl Drop for FlushJsonBlockCacheDB {
490    fn drop(&mut self) {
491        trace!(target: "fork::cache", "flushing cache");
492        self.0.flush();
493        trace!(target: "fork::cache", "flushed cache");
494    }
495}
496
497#[cfg(test)]
498mod tests {
499    use super::*;
500
501    #[test]
502    fn can_deserialize_cache() {
503        let s = r#"{
504    "meta": {
505        "cfg_env": {
506            "chain_id": 1337,
507            "perf_analyse_created_bytecodes": "Analyse",
508            "limit_contract_code_size": 18446744073709551615,
509            "memory_limit": 4294967295,
510            "disable_block_gas_limit": false,
511            "disable_eip3607": false,
512            "disable_base_fee": false
513        },
514        "block_env": {
515            "number": 15547871,
516            "coinbase": "0x0000000000000000000000000000000000000000",
517            "timestamp": 1663351871,
518            "difficulty": "0x0",
519            "basefee": 12448539171,
520            "gas_limit": 30000000,
521            "prevrandao": "0x0000000000000000000000000000000000000000000000000000000000000000"
522        },
523        "hosts": [
524            "eth-mainnet.alchemyapi.io"
525        ]
526    },
527    "accounts": {
528        "0xb8ffc3cd6e7cf5a098a1c92f48009765b24088dc": {
529            "balance": "0x0",
530            "nonce": 10,
531            "code_hash": "0x3ac64c95eedf82e5d821696a12daac0e1b22c8ee18a9fd688b00cfaf14550aad",
532            "code": {
533                "LegacyAnalyzed": {
534                    "bytecode": "0x00",
535                    "original_len": 0,
536                    "jump_table": {
537                      "order": "bitvec::order::Lsb0",
538                      "head": {
539                        "width": 8,
540                        "index": 0
541                      },
542                      "bits": 1,
543                      "data": [0]
544                    }
545                }
546            }
547        }
548    },
549    "storage": {
550        "0xa354f35829ae975e850e23e9615b11da1b3dc4de": {
551            "0x290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e564": "0x5553444320795661756c74000000000000000000000000000000000000000000",
552            "0x10": "0x37fd60ff8346",
553            "0x290decd9548b62a8d60345a988386fc84ba6bc95484008f6362f93160ef3e563": "0xb",
554            "0x6": "0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48",
555            "0x5": "0x36ff5b93162e",
556            "0x14": "0x29d635a8e000",
557            "0x11": "0x63224c73",
558            "0x2": "0x6"
559        }
560    },
561    "block_hashes": {
562        "0xed3deb": "0xbf7be3174b261ea3c377b6aba4a1e05d5fae7eee7aab5691087c20cf353e9877",
563        "0xed3de9": "0xba1c3648e0aee193e7d00dffe4e9a5e420016b4880455641085a4731c1d32eef",
564        "0xed3de8": "0x61d1491c03a9295fb13395cca18b17b4fa5c64c6b8e56ee9cc0a70c3f6cf9855",
565        "0xed3de7": "0xb54560b5baeccd18350d56a3bee4035432294dc9d2b7e02f157813e1dee3a0be",
566        "0xed3dea": "0x816f124480b9661e1631c6ec9ee39350bda79f0cbfc911f925838d88e3d02e4b"
567    }
568}"#;
569
570        let cache: JsonBlockCacheData = serde_json::from_str(s).unwrap();
571        assert_eq!(cache.data.accounts.read().len(), 1);
572        assert_eq!(cache.data.storage.read().len(), 1);
573        assert_eq!(cache.data.block_hashes.read().len(), 5);
574
575        let _s = serde_json::to_string(&cache).unwrap();
576    }
577
578    #[test]
579    fn can_deserialize_cache_post_4844() {
580        let s = r#"{
581    "meta": {
582        "cfg_env": {
583            "chain_id": 1,
584            "kzg_settings": "Default",
585            "perf_analyse_created_bytecodes": "Analyse",
586            "limit_contract_code_size": 18446744073709551615,
587            "memory_limit": 134217728,
588            "disable_block_gas_limit": false,
589            "disable_eip3607": true,
590            "disable_base_fee": false,
591            "optimism": false
592        },
593        "block_env": {
594            "number": 18651580,
595            "coinbase": "0x4838b106fce9647bdf1e7877bf73ce8b0bad5f97",
596            "timestamp": 1700950019,
597            "gas_limit": 30000000,
598            "basefee": 26886078239,
599            "difficulty": "0xc6b1a299886016dea3865689f8393b9bf4d8f4fe8c0ad25f0058b3569297c057",
600            "prevrandao": "0xc6b1a299886016dea3865689f8393b9bf4d8f4fe8c0ad25f0058b3569297c057",
601            "blob_excess_gas_and_price": {
602                "excess_blob_gas": 0,
603                "blob_gasprice": 1
604            }
605        },
606        "hosts": [
607            "eth-mainnet.alchemyapi.io"
608        ]
609    },
610    "accounts": {
611        "0x4838b106fce9647bdf1e7877bf73ce8b0bad5f97": {
612            "balance": "0x8e0c373cfcdfd0eb",
613            "nonce": 128912,
614            "code_hash": "0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470",
615            "code": {
616                "LegacyAnalyzed": {
617                    "bytecode": "0x00",
618                    "original_len": 0,
619                    "jump_table": {
620                      "order": "bitvec::order::Lsb0",
621                      "head": {
622                        "width": 8,
623                        "index": 0
624                      },
625                      "bits": 1,
626                      "data": [0]
627                    }
628                }
629            }
630        }
631    },
632    "storage": {},
633    "block_hashes": {}
634}"#;
635
636        let cache: JsonBlockCacheData = serde_json::from_str(s).unwrap();
637        assert_eq!(cache.data.accounts.read().len(), 1);
638
639        let _s = serde_json::to_string(&cache).unwrap();
640    }
641
642    #[test]
643    fn can_return_cache_path_if_set() {
644        // set
645        let cache_db = JsonBlockCacheDB::new(
646            Arc::new(RwLock::new(BlockchainDbMeta::default())),
647            Some(PathBuf::from("/tmp/foo")),
648        );
649        assert_eq!(Some(Path::new("/tmp/foo")), cache_db.cache_path());
650
651        // unset
652        let cache_db =
653            JsonBlockCacheDB::new(Arc::new(RwLock::new(BlockchainDbMeta::default())), None);
654        assert_eq!(None, cache_db.cache_path());
655    }
656}