Skip to main content

midnight_storage/delta_tracking/
mod.rs

1// This file is part of midnight-ledger.
2// Copyright (C) 2025 Midnight Foundation
3// SPDX-License-Identifier: Apache-2.0
4// Licensed under the Apache License, Version 2.0 (the "License");
5// You may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7// http://www.apache.org/licenses/LICENSE-2.0
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14//! Delta tracking module for write and delete costing
15//!
16//! This module provides the `RcMap` data structure and associated functions
17//! for implementing write and delete costing in the Midnight ledger.
18
19mod rcmap;
20
21pub use rcmap::{ChildRef, RcMap};
22use serialize::Serializable;
23
24use crate::arena::ArenaKey;
25use crate::db::DB;
26use base_crypto::cost_model::{CostDuration, RunningCost};
27use std::collections::BTreeSet;
28
29/// Result of write and delete cost computation
30pub struct WriteDeleteResults<D: DB> {
31    /// Total bytes that would be written to storage
32    pub bytes_written: u64,
33    /// Total bytes that would be deleted from storage
34    pub bytes_deleted: u64,
35    /// The amount of nodes created
36    pub nodes_written: u64,
37    /// The amount of nodes deleted
38    pub nodes_deleted: u64,
39    /// The CPU cost of the write/delete processing
40    pub processing_cost: RunningCost,
41    /// Updated charged keys map (`K1`) after write and delete computation
42    pub updated_charged_keys: RcMap<D>,
43}
44
45/// Return costed initial `RcMap` (`K0`) for a root set `r0`.
46///
47/// For costing initial state of new contract.
48///
49/// WARNING: this requires the keys in `r0` to be in the back-end; see similar
50/// warning in `incremental_write_delete_costs` for more details.
51pub fn initial_write_delete_costs<D: DB>(
52    r0: &BTreeSet<ArenaKey<D::Hasher>>,
53    cpu_cost: impl Fn(u64, u64) -> RunningCost,
54) -> WriteDeleteResults<D> {
55    let rcmap = RcMap::default();
56    let keys_reachable_from_r0 = get_writes(&rcmap, r0);
57    let keys_removed = BTreeSet::new();
58    let k0 = update_rcmap(&rcmap, &keys_reachable_from_r0);
59    WriteDeleteResults::new(keys_reachable_from_r0, keys_removed, k0, cpu_cost)
60}
61
62/// Compute write and delete costs from old charged keys `k0` and new root set `r1`,
63/// returning costed updated charged keys (`K1`).
64///
65/// For costing a call to an existing contract.
66///
67/// This implements the complete costing algorithm from the spec.
68///
69/// WARNING: this requires the keys in `r1` to be in the back-end, which in turn
70/// requires them to be `persist()`ed or currently loaded in `Sp`s. The assumption
71/// is that callers of this function will do so while `Sp`s are in scope for the
72/// `r1` keys, which happens naturally when calling this function on keys taken
73/// from the output `StateValue` of the VM.
74pub fn incremental_write_delete_costs<D: DB>(
75    k0: &RcMap<D>,
76    r1: &BTreeSet<ArenaKey<D::Hasher>>,
77    cpu_cost: impl Fn(u64, u64) -> RunningCost,
78    gc_limit: impl FnOnce(RunningCost) -> usize,
79) -> WriteDeleteResults<D> {
80    // Step 1: Find keys that need to be written (reachable from R1 but not in K0)
81    let keys_added = get_writes(k0, r1);
82    let added_cost = cpu_cost(keys_added.len() as u64, 0);
83    // Step 2: Update reference counts by adding the new keys
84    let k = update_rcmap(k0, &keys_added);
85    // Step 3: Garbage collect unreachable keys
86    let (k1, keys_removed) = gc_rcmap(&k, r1, gc_limit(added_cost));
87    WriteDeleteResults::new(keys_added, keys_removed, k1, cpu_cost)
88}
89
90/// Compute total bytes from a set of keys by summing their node sizes.
91fn compute_bytes_from_keys<D: DB>(keys: &BTreeSet<ArenaKey<D::Hasher>>) -> u64 {
92    let arena = &crate::storage::default_storage::<D>().arena;
93    arena.with_backend(|backend| {
94        keys.iter()
95            .map(|key| {
96                match key {
97                    ArenaKey::Ref(key) => {
98                        backend
99                        .get(key)
100                        // WARNING: this requires the keys to be in the backend,
101                        // which in turn requires them to be `persist()`ed or
102                        // currently loaded in sps. We ensure this by storing refs
103                        // to charged keys in the RcMap, which itself gets persisted
104                        // as part of the ContractState's ChargedState.
105                        .expect("key should exist in arena when computing bytes")
106                        .size() as u64
107                        // Overhead of storing the refcount itself
108                        + 32 + 4
109                    }
110                    // Direct children *must* be in rc_0, where they are both key and value
111                    ArenaKey::Direct(_) => key.serialized_size() as u64 * 2,
112                }
113            })
114            .sum()
115    })
116}
117
118impl<D: DB> WriteDeleteResults<D> {
119    /// Compute `WriteDeleteResults` for new `RcMap` and key deltas.
120    fn new(
121        keys_added: BTreeSet<ArenaKey<D::Hasher>>,
122        keys_removed: BTreeSet<ArenaKey<D::Hasher>>,
123        new_charged_keys: RcMap<D>,
124        cpu_cost: impl Fn(u64, u64) -> RunningCost,
125    ) -> Self {
126        let nodes_written = keys_added.len() as u64;
127        let nodes_deleted = keys_removed.len() as u64;
128        Self {
129            bytes_written: compute_bytes_from_keys::<D>(&keys_added),
130            bytes_deleted: compute_bytes_from_keys::<D>(&keys_removed),
131            nodes_written,
132            nodes_deleted,
133            processing_cost: cpu_cost(nodes_written, nodes_deleted),
134            updated_charged_keys: new_charged_keys,
135        }
136    }
137
138    /// Get `RunningCost` of these results.
139    pub fn running_cost(&self) -> RunningCost {
140        RunningCost {
141            read_time: CostDuration::ZERO,
142            compute_time: CostDuration::ZERO,
143            bytes_written: self.bytes_written,
144            bytes_deleted: self.bytes_deleted,
145        } + self.processing_cost
146    }
147}
148
149/// Compute keys reachable from `roots` that are not currently charged in the
150/// `RcMap`.
151///
152/// Assumes: `rcmap` is child closed.
153///
154/// Ensures: the return value union `rcmap` is child closed, and contains all
155/// keys in `roots`.
156pub fn get_writes<D: DB>(
157    rcmap: &RcMap<D>,
158    roots: &BTreeSet<ArenaKey<D::Hasher>>,
159) -> BTreeSet<ArenaKey<D::Hasher>> {
160    let arena = &crate::storage::default_storage::<D>().arena;
161    let mut queue: Vec<ArenaKey<D::Hasher>> = roots.iter().cloned().collect();
162    queue.sort();
163    let mut keys_added = BTreeSet::new();
164
165    while let Some(key) = queue.pop() {
166        if !rcmap.contains(&key) && !keys_added.contains(&key) {
167            match &key {
168                ArenaKey::Ref(key) => {
169                    let children = arena
170                        .children(key)
171                        .expect("children for write update should be loadable");
172                    queue.extend(
173                        children
174                            .iter()
175                            .flat_map(ArenaKey::refs)
176                            .map(|r| ArenaKey::Ref(r.clone())),
177                    );
178                }
179                ArenaKey::Direct(_) => {
180                    queue.extend(key.refs().into_iter().map(|r| ArenaKey::Ref(r.clone())))
181                }
182            }
183            keys_added.insert(key);
184        }
185    }
186    keys_added
187}
188
189/// Update an `RcMap` by adding reference counts for the provided keys and all
190/// their children.  Returns a new `RcMap` with the updated reference counts.
191///
192/// Assumes:
193/// - `rcmap` union `keys_added` is child closed.
194/// - `rcmap` has internally accurate reference counts.
195///
196/// Ensures: the returned `RcMap` is child closed, and has internally accurate
197/// reference counts.
198#[must_use]
199pub fn update_rcmap<D: DB>(
200    rcmap: &RcMap<D>,
201    keys_added: &BTreeSet<ArenaKey<D::Hasher>>,
202) -> RcMap<D> {
203    let arena = &crate::storage::default_storage::<D>().arena;
204    let mut rcmap = rcmap.clone();
205
206    // Count the new refs locally first to minimize the expensive-ish rcmap increments
207    let mut inc_map = keys_added
208        .iter()
209        .map(|k| (k.clone(), 0))
210        .collect::<std::collections::BTreeMap<_, _>>();
211    // Initialize all new keys with rc = 0
212    // Update reference counts for all edges from new keys
213    for key in keys_added {
214        match key {
215            ArenaKey::Ref(key) => {
216                let children = arena.children(key).expect("children should be loadable");
217                for child in children
218                    .iter()
219                    .flat_map(ArenaKey::refs)
220                    .map(|r| ArenaKey::Ref(r.clone()))
221                {
222                    *inc_map.entry(child).or_default() += 1;
223                }
224            }
225            ArenaKey::Direct(_) => {
226                for child in key.refs().into_iter().map(|r| ArenaKey::Ref(r.clone())) {
227                    *inc_map.entry(child).or_default() += 1;
228                }
229            }
230        }
231    }
232    let mut inc_vec = inc_map.into_iter().collect::<Vec<_>>();
233    inc_vec.sort();
234    for (k, by) in inc_vec.into_iter() {
235        match &k {
236            ArenaKey::Ref(r) => {
237                let old_rc = rcmap.get_rc(&k).unwrap_or(0);
238                rcmap = rcmap.modify_rc(r, old_rc + by);
239            }
240            ArenaKey::Direct(_) => rcmap = rcmap.ins_root(k),
241        }
242    }
243
244    rcmap
245}
246
247/// Perform garbage collection by removing keys with zero reference counts that
248/// are not in `roots`.
249///
250/// Returns a tuple of `(updated_rcmap, keys_removed)`.
251#[must_use]
252pub fn gc_rcmap<D: DB>(
253    orig_rcmap: &RcMap<D>,
254    roots: &BTreeSet<ArenaKey<D::Hasher>>,
255    step_limit: usize,
256) -> (RcMap<D>, BTreeSet<ArenaKey<D::Hasher>>) {
257    let arena = &crate::storage::default_storage::<D>().arena;
258    let mut rcmap = orig_rcmap.clone();
259    let mut keys_removed = BTreeSet::new();
260    let mut step = 0;
261    let mut storage_queue = orig_rcmap.get_unreachable_keys_not_in(roots);
262    // Invariant: keys in queue have rc == 0 and aren't in r1.
263    let mut queue: Vec<ArenaKey<D::Hasher>> = Vec::new();
264    let mut rc_cache = std::collections::BTreeMap::new();
265    let mut update_queue = std::collections::BTreeMap::new();
266
267    // First, accumulate update information (to churn storage less)
268    while let Some(key) = storage_queue.next().or_else(|| queue.pop()) {
269        if step >= step_limit {
270            break;
271        }
272        step += 1;
273
274        // Decrement reference counts of key's children
275        let children_refs: Box<dyn Iterator<Item = _>> = match &key {
276            ArenaKey::Ref(key) => Box::new(
277                arena
278                    .children(key)
279                    .expect("children should be loadable")
280                    .into_iter()
281                    .flat_map(|c| {
282                        c.refs()
283                            .into_iter()
284                            .cloned()
285                            .collect::<Vec<_>>()
286                            .into_iter()
287                    }),
288            ),
289            ArenaKey::Direct(_) => Box::new(key.refs().into_iter().cloned()),
290        };
291        for child in children_refs.map(|r| ArenaKey::Ref(r.clone())) {
292            let existing = rc_cache
293                .entry(child.clone())
294                .or_insert_with(|| rcmap.get_rc(&child).unwrap_or(0));
295            let sub = update_queue.entry(child.clone()).or_default();
296            *sub += 1;
297            // If child's rc became 0 and it's not in roots, add it to the gc queue
298            if *sub >= *existing && !roots.contains(&child) {
299                queue.push(child.clone());
300            }
301        }
302        keys_removed.insert(key);
303    }
304
305    let mut update_vec = update_queue.into_iter().collect::<Vec<_>>();
306    update_vec.sort();
307    // Execute on the update information
308    for (key, update) in update_vec.into_iter() {
309        match &key {
310            ArenaKey::Ref(r) => {
311                let original = rc_cache
312                    .get(&key)
313                    .expect("must have cached decremented key");
314                let updated = original.saturating_sub(update);
315                rcmap = rcmap.modify_rc(r, updated);
316            }
317            ArenaKey::Direct(_) => rcmap = rcmap.rm_root(&key),
318        }
319    }
320
321    let mut removed_vec = keys_removed.iter().collect::<Vec<_>>();
322    removed_vec.sort();
323    for key in removed_vec.into_iter() {
324        // Remove key.
325        //
326        // WARNING: the order here is important, i.e. first decrementing
327        // children, and then removing their parents. The reason is that the
328        // RcMap only holds backend references to the keys with refcount zero,
329        // where here in these high-level functions that use the RcMap we
330        // maintain the invariant that RcMap is ref-count accurate for the roots
331        // it was constructed from. So, if we remove a parent key from rc_0
332        // before decrementing its children, its children could become
333        // unreachable!
334        //
335        // NOTE: we don't have the same concerns in `get_writes` and
336        // `update_rcmap`, since there we're concerned with increasing rcs
337        // corresponding to nodes that are currently in the arena in existing
338        // sps.
339        rcmap = rcmap
340            .remove_unreachable_key(key)
341            .expect("keys in queue have rc == 0");
342    }
343
344    (rcmap, keys_removed)
345}
346
347#[cfg(test)]
348mod tests {
349    use super::*;
350    use crate as storage;
351    use crate::arena::Sp;
352    use crate::db::DB;
353    use crate::storable::{Loader, SMALL_OBJECT_LIMIT};
354    use crate::storage::set_default_storage;
355    use crate::{DefaultDB, Storable};
356    use derive_where::derive_where;
357    use serialize::Tagged;
358    use std::collections::BTreeMap;
359
360    // Simple test node that can form arbitrary DAGs
361    #[derive(Storable, Debug, Hash)]
362    #[derive_where(Clone, PartialEq, Eq)]
363    #[storable(db = D)]
364    #[tag = "test_node[v1]"]
365    struct Node<D: DB = DefaultDB> {
366        id: u64, // Encode (layer, node_id) as layer * 256 + node_id
367        children: Vec<Sp<Node<D>, D>>,
368        _data: [u8; SMALL_OBJECT_LIMIT], // In-lined data to keep nodes large enough to not be inlined themselves
369    }
370
371    impl<D: DB> Node<D> {
372        fn new(id: (u8, u8), children: &[Sp<Node<D>, D>]) -> Sp<Node<D>, D> {
373            let encoded_id = (id.0 as u64) * 256 + (id.1 as u64);
374            Sp::new(Node {
375                id: encoded_id,
376                children: children.to_vec(),
377                _data: [0; SMALL_OBJECT_LIMIT],
378            })
379        }
380    }
381
382    struct Dag<D: DB = DefaultDB> {
383        nodes: BTreeMap<(u8, u8), ArenaKey<D::Hasher>>,
384        // Sps of the roots to keep them in backend.
385        _roots: Vec<Sp<Node<D>, D>>,
386    }
387
388    // Test DAG specified as adjacency list.
389    //
390    // This is pretty arbitrary, chosen for being "kind of complicated" and
391    // "doesn't have sideways or back edges". The tests are mostly agnostic to
392    // the structure here, altho they often assume that (0,1) and (0,2) are
393    // valid nodes with large descendant subgraphs.
394    #[allow(clippy::type_complexity)]
395    fn test_dag_adjacency() -> Vec<((u8, u8), Vec<(u8, u8)>)> {
396        vec![
397            // Layer 0: roots
398            ((0, 1), vec![(1, 1), (1, 2), (2, 1), (3, 1)]),
399            ((0, 2), vec![(1, 2), (1, 3)]),
400            // Layer 1
401            ((1, 1), vec![(2, 1), (2, 2)]),
402            ((1, 2), vec![(2, 2), (3, 1)]),
403            ((1, 3), vec![(2, 3), (3, 2)]),
404            // Layer 2
405            ((2, 1), vec![(3, 1), (3, 2), (3, 3)]),
406            ((2, 2), vec![(3, 2), (4, 1)]),
407            ((2, 3), vec![(3, 3), (3, 4)]),
408            // Layer 3
409            ((3, 1), vec![(4, 1), (4, 2)]),
410            ((3, 2), vec![(4, 1), (4, 2), (4, 3)]),
411            ((3, 3), vec![]),
412            ((3, 4), vec![(4, 3), (5, 1)]),
413            // Layer 4
414            ((4, 1), vec![(5, 1)]),
415            ((4, 2), vec![(5, 1), (5, 2)]),
416            ((4, 3), vec![(5, 2)]),
417            // Layer 5: leaves
418            ((5, 1), vec![]),
419            ((5, 2), vec![]),
420        ]
421    }
422
423    // Build the actual DAG from the adjacency list
424    fn build_test_dag<D: DB>() -> Dag<D> {
425        let adjacency = test_dag_adjacency();
426        let mut nodes: BTreeMap<(u8, u8), Sp<Node<D>, D>> = BTreeMap::new();
427
428        // Build bottom-up: iterate adjacency list in reverse order, because
429        // there are no back or sideways edges
430        for ((layer, id), children_ids) in adjacency.iter().rev() {
431            let node_id = (*layer, *id);
432            let children: Vec<_> = children_ids
433                .iter()
434                .map(|child_id| nodes[child_id].clone())
435                .collect();
436            nodes.insert(node_id, Node::new(node_id, &children));
437        }
438
439        // Convert to ArenaHash map for easier test access
440        let mut arena_nodes = BTreeMap::new();
441        for ((layer, id), node) in &nodes {
442            arena_nodes.insert((*layer, *id), node.child_repr.clone());
443        }
444
445        Dag {
446            nodes: arena_nodes,
447            _roots: vec![nodes[&(0, 1)].clone(), nodes[&(0, 2)].clone()],
448        }
449    }
450
451    // Compute all nodes reachable from given roots using adjacency list
452    fn compute_reachable_nodes(roots: &[(u8, u8)]) -> BTreeSet<(u8, u8)> {
453        let adjacency = test_dag_adjacency();
454        let mut reachable = BTreeSet::new();
455        let mut queue: Vec<(u8, u8)> = roots.to_vec();
456
457        while let Some(node_id) = queue.pop() {
458            if !reachable.insert(node_id) {
459                continue;
460            }
461            // Add children to queue
462            let children = adjacency
463                .iter()
464                .find(|(k, _)| *k == node_id)
465                .expect("nodes must be in adjacency")
466                .1
467                .clone();
468            queue.extend(children);
469        }
470
471        reachable
472    }
473
474    // Get subgraph reference counts for nodes reachable from roots
475    fn get_subgraph_rcs(roots: &[(u8, u8)]) -> BTreeMap<(u8, u8), u64> {
476        let adjacency = test_dag_adjacency();
477        let reachable = compute_reachable_nodes(roots);
478        let mut rcs = BTreeMap::new();
479
480        // Initialize all reachable nodes with rc=0
481        for node_id in &reachable {
482            rcs.insert(*node_id, 0);
483        }
484
485        // Count incoming edges from nodes in the subgraph
486        for (parent_id, children) in &adjacency {
487            if reachable.contains(parent_id) {
488                for child_id in children {
489                    let rc = rcs.get_mut(child_id).unwrap();
490                    *rc += 1;
491                }
492            }
493        }
494
495        rcs
496    }
497
498    // Convert node IDs to ArenaHashs using the DAG
499    fn to_keys<'a, I>(node_ids: I) -> BTreeSet<ArenaKey<crate::DefaultHasher>>
500    where
501        I: IntoIterator<Item = &'a (u8, u8)>,
502    {
503        let dag = build_test_dag::<DefaultDB>();
504        node_ids
505            .into_iter()
506            .map(|id| dag.nodes[id].clone())
507            .collect()
508    }
509
510    use super::rcmap::tests::get_rcmap_descendants;
511
512    #[test]
513    fn get_writes() {
514        // Need this alive to be sure that nodes stay in backend.
515        let _dag = build_test_dag::<DefaultDB>();
516
517        // Test 1: Empty K0 should return all reachable nodes from R1
518        let k0: RcMap<DefaultDB> = RcMap::default();
519        let roots = [(0, 1)];
520        let r1 = to_keys(roots.iter());
521
522        let writes = super::get_writes(&k0, &r1.clone().into_iter().collect());
523        let expected_reachable = compute_reachable_nodes(&roots);
524        let expected_keys = to_keys(expected_reachable.iter());
525
526        assert_eq!(
527            writes,
528            expected_keys.into_iter().collect(),
529            "Write set should contain exactly the reachable nodes"
530        );
531
532        // Test 2: K0 contains bottom layers (3,4,5) - should truncate traversal
533        // Layers 3, 4, 5
534        let k0_node_ids: Vec<_> = test_dag_adjacency()
535            .iter()
536            .map(|((layer, id), _)| (*layer, *id))
537            .filter(|(layer, _)| *layer >= 3 && *layer <= 5)
538            .collect();
539        let k0_keys = to_keys(k0_node_ids.iter());
540        let k0_writes =
541            super::get_writes::<DefaultDB>(&RcMap::default(), &k0_keys.into_iter().collect());
542        let k0: RcMap<DefaultDB> = super::update_rcmap(&RcMap::default(), &k0_writes);
543
544        let writes = super::get_writes(&k0, &r1.into_iter().collect());
545
546        // Compute what should be written: reachable from roots minus what's in K0
547        let reachable_from_r1 = compute_reachable_nodes(&roots);
548        let k0_set: BTreeSet<_> = k0_node_ids.iter().copied().collect();
549        let expected_writes = &reachable_from_r1 - &k0_set;
550        let expected_writes_keys = to_keys(expected_writes.iter());
551
552        assert_eq!(
553            writes,
554            expected_writes_keys.into_iter().collect(),
555            "Write set should exclude K0 nodes"
556        );
557
558        // Test 3: Multiple roots
559        let multi_roots = [(0, 1), (0, 2)];
560        let r1_multi = to_keys(multi_roots.iter());
561        let writes_multi =
562            super::get_writes::<DefaultDB>(&RcMap::default(), &r1_multi.into_iter().collect());
563        let expected_multi = compute_reachable_nodes(&multi_roots);
564        let expected_multi_keys = to_keys(expected_multi.iter());
565
566        assert_eq!(
567            writes_multi,
568            expected_multi_keys.into_iter().collect(),
569            "Multiple roots should give union of reachable sets"
570        );
571    }
572
573    #[test]
574    fn update_rcmap() {
575        let dag = build_test_dag::<DefaultDB>();
576
577        // Test updating empty RcMap with nodes reachable from specific roots
578        let roots = [(0, 1)];
579        let reachable = compute_reachable_nodes(&roots);
580
581        let k0: RcMap<DefaultDB> = RcMap::default();
582        let writes = to_keys(reachable.iter());
583
584        let k1 = super::update_rcmap(&k0, &writes.into_iter().collect());
585
586        // Compute expected reference counts based on adjacency
587        let expected_rcs = get_subgraph_rcs(&roots);
588
589        // Verify reference counts match expectations
590        for (node_id, expected_rc) in expected_rcs {
591            let actual_rc = k1.get_rc(&dag.nodes[&node_id].clone()).unwrap();
592            assert_eq!(
593                actual_rc, expected_rc,
594                "Node {:?} should have rc={}, got {}",
595                node_id, expected_rc, actual_rc
596            );
597        }
598    }
599
600    #[test]
601    fn gc_rcmap() {
602        let dag = build_test_dag::<DefaultDB>();
603
604        // Build initial RcMap with nodes reachable from both root types
605        let full_roots = [(0, 1), (0, 2)];
606        let full_reachable = compute_reachable_nodes(&full_roots);
607        let all_writes = to_keys(full_reachable.iter());
608        let k0: RcMap<DefaultDB> =
609            super::update_rcmap(&RcMap::default(), &all_writes.into_iter().collect());
610
611        // Test GC with limited root set: only one root type
612        let limited_roots = [(0, 1)];
613        let roots = to_keys(limited_roots.iter());
614
615        let step_limit = 1000;
616        let (k1, removed) = super::gc_rcmap(&k0, &roots.clone().into_iter().collect(), step_limit);
617
618        // Compute what should remain vs be removed
619        let kept_nodes = compute_reachable_nodes(&limited_roots);
620        let expected_removed: BTreeSet<_> = &full_reachable - &kept_nodes;
621
622        // Verify removed nodes
623        assert_eq!(
624            removed.len(),
625            expected_removed.len(),
626            "Should remove exactly the unreachable nodes"
627        );
628
629        for node_id in &expected_removed {
630            assert!(
631                removed.contains(&dag.nodes[node_id].clone()),
632                "Node {:?} should be removed as unreachable",
633                node_id
634            );
635            assert_eq!(
636                k1.get_rc(&dag.nodes[node_id]),
637                None,
638                "Removed node {:?} should not have rc in new map",
639                node_id
640            );
641        }
642
643        // Verify kept nodes
644        for node_id in &kept_nodes {
645            assert!(
646                !removed.contains(&dag.nodes[node_id].clone()),
647                "Node {:?} should not be removed as it's reachable",
648                node_id
649            );
650            assert!(
651                k1.get_rc(&dag.nodes[node_id].clone()).is_some(),
652                "Remaining node {:?} should have rc in new map",
653                node_id
654            );
655        }
656
657        // Test step limit: should stop GC early with limited steps
658        let (k2, removed2) = super::gc_rcmap(&k0, &roots.clone().into_iter().collect(), 2);
659        assert!(
660            removed2.len() == 2,
661            "With step_limit=2, should remove 2 nodes"
662        );
663
664        // Test resuming GC
665        let (_k3, removed3) =
666            super::gc_rcmap(&k2, &roots.into_iter().collect(), expected_removed.len());
667        let total_removed: BTreeSet<_> = removed2.union(&removed3).cloned().collect();
668        assert!(
669            total_removed.len() == expected_removed.len(),
670            "Resuming GC should make progress"
671        );
672
673        // Run gc one step at a time until no progress is possible
674        let empty_roots = BTreeSet::new();
675        let mut current_rcmap = k0.clone();
676        let mut total_single_step_removed = BTreeSet::new();
677        loop {
678            let (new_rcmap, removed_single) = super::gc_rcmap(&current_rcmap, &empty_roots, 1);
679            if removed_single.is_empty() {
680                break; // No more progress
681            }
682            total_single_step_removed.extend(removed_single);
683            current_rcmap = new_rcmap;
684        }
685
686        assert_eq!(
687            total_single_step_removed.len(),
688            full_reachable.len(),
689            "Single-step GC should eventually remove all nodes with empty root set"
690        );
691    }
692
693    // Test that GC operations don't crash when RcMap holds only references to data
694    #[test]
695    fn rcmap_survives_gc_with_only_references() {
696        use crate::db::InMemoryDB;
697        use crate::storage::WrappedDB;
698        use std::collections::BTreeSet;
699
700        struct Tag;
701        type W = WrappedDB<InMemoryDB, Tag>;
702        set_default_storage(crate::Storage::<W>::default).unwrap();
703
704        // Here rcmap has the only references to the underlying objects
705        let rcmap: RcMap<W> = {
706            // Build test DAG with isolated DB and create RcMap for full DAG
707            let dag = build_test_dag::<W>();
708            let full_roots = [(0, 1), (0, 2)];
709            let all_reachable = compute_reachable_nodes(&full_roots);
710            let all_writes: BTreeSet<_> = all_reachable
711                .iter()
712                .map(|id| dag.nodes[id].clone())
713                .collect();
714            super::update_rcmap(&RcMap::default(), &all_writes.into_iter().collect())
715        };
716
717        // Run GC from empty root set - should GC everything but not crash
718        let empty_roots = BTreeSet::new();
719        let (_final_rcmap, _removed) = super::gc_rcmap(&rcmap, &empty_roots, 1000);
720
721        // If we reach here without panics, RcMap successfully survived GC
722        // with only references to the data
723    }
724
725    // Test that initial_write_delete_costs and incremental_write_delete_costs
726    // work correctly for various randomly chosen contract state
727    // transitions.
728    #[test]
729    fn write_delete_costs() {
730        use rand::rngs::StdRng;
731        use rand::{Rng, SeedableRng};
732        use std::collections::{BTreeMap, BTreeSet};
733
734        let dag = build_test_dag::<DefaultDB>();
735
736        // Collect all node IDs from the DAG for random selection
737        let all_node_ids: Vec<(u8, u8)> = dag.nodes.keys().cloned().collect();
738
739        // Use a fixed seed for reproducibility
740        let mut rng = StdRng::seed_from_u64(42);
741
742        // Generate 100 random root sets with varying sizes
743        let mut root_sets = Vec::new();
744        for _ in 0..100 {
745            let root_set_size = {
746                // Size distribution:
747                // - 40% small (0-3 nodes)
748                // - 40% medium (4-8 nodes)
749                // - 15% large (9-15 nodes)
750                // - 5% very large (16-25 nodes)
751                let p = rng.gen_range(0..100);
752                if p < 40 {
753                    rng.gen_range(0..=3)
754                } else if p < 80 {
755                    rng.gen_range(4..=8)
756                } else if p < 95 {
757                    rng.gen_range(9..=15)
758                } else {
759                    rng.gen_range(16..=25.min(all_node_ids.len()))
760                }
761            };
762
763            // Randomly select nodes for this root set
764            let mut selected_nodes = BTreeSet::new();
765            while selected_nodes.len() < root_set_size {
766                let idx = rng.gen_range(0..all_node_ids.len());
767                selected_nodes.insert(all_node_ids[idx]);
768            }
769
770            root_sets.push(selected_nodes.into_iter().collect::<Vec<_>>());
771        }
772
773        // Convert root sets to ArenaHash sets
774        let root_sets_as_keys: Vec<BTreeSet<_>> = root_sets.iter().map(to_keys).collect();
775
776        // Compute initial_write_delete_costs for each root set.
777        for i in 0..root_sets.len() {
778            let results = super::initial_write_delete_costs::<DefaultDB>(
779                &root_sets_as_keys[i].clone().into_iter().collect(),
780                |_, _| Default::default(),
781            );
782
783            // Verify the RcMap matches what get_subgraph_rcs predicts
784            let expected_rcs = get_subgraph_rcs(&root_sets[i]);
785            let actual_rcs = results.updated_charged_keys.get_rcs();
786
787            // Convert expected_rcs node IDs to ArenaHashs for comparison
788            let expected_rcs_as_keys: BTreeMap<_, _> = expected_rcs
789                .into_iter()
790                .map(|(node_id, rc)| (dag.nodes[&node_id].clone(), rc))
791                .collect();
792
793            assert_eq!(
794                actual_rcs, expected_rcs_as_keys,
795                "Initial costs for root set {} should have correct reference counts",
796                i
797            );
798
799            // Verify that all keys in the rootset are descendants of the RcMap
800            let rcmap_descendants = get_rcmap_descendants(&results.updated_charged_keys);
801            for root_key in &root_sets_as_keys[i] {
802                assert!(
803                    rcmap_descendants.contains(root_key),
804                    "Root key {:?} should be a descendant of RcMap after initial_write_delete_costs",
805                    root_key
806                );
807            }
808        }
809
810        // Initialize from the first root set, and then iterate over each
811        // subsequent root set and compute incremental_write_delete_costs.
812        let initial_roots = &root_sets_as_keys[0];
813        let initial_results =
814            super::initial_write_delete_costs(initial_roots, |_, _| Default::default());
815        let mut current_charged_keys = initial_results.updated_charged_keys;
816        for i in 1..root_sets.len() {
817            let next_roots = &root_sets_as_keys[i];
818            let results = super::incremental_write_delete_costs::<DefaultDB>(
819                &current_charged_keys,
820                next_roots,
821                |_, _| Default::default(),
822                |_| 1000, // High step limit for complete GC
823            );
824
825            // Verify the RcMap matches what get_subgraph_rcs predicts for the new root set
826            let expected_rcs = get_subgraph_rcs(&root_sets[i]);
827            let actual_rcs = results.updated_charged_keys.get_rcs();
828
829            // Convert expected_rcs node IDs to ArenaHashs for comparison
830            let expected_rcs_as_keys: BTreeMap<_, _> = expected_rcs
831                .into_iter()
832                .map(|(node_id, rc)| (dag.nodes[&node_id].clone(), rc))
833                .collect();
834
835            assert_eq!(
836                actual_rcs, expected_rcs_as_keys,
837                "Incremental transition {} should have correct reference counts",
838                i
839            );
840
841            // Verify that all keys in the rootset are descendants of the RcMap
842            let rcmap_descendants = get_rcmap_descendants(&results.updated_charged_keys);
843            for root_key in next_roots {
844                assert!(
845                    rcmap_descendants.contains(root_key),
846                    "Root key {:?} should be a descendant of RcMap after incremental_write_delete_costs",
847                    root_key
848                );
849            }
850
851            // Update for next iteration
852            current_charged_keys = results.updated_charged_keys;
853        }
854    }
855}