Skip to main content

blvm_consensus/
utxo_overlay.rs

1//! Zero-Copy UTXO Overlay
2//!
3//! Provides a copy-on-write view over the UTXO set for block validation.
4//! Instead of cloning the entire UTXO set (O(n) where n = millions of UTXOs),
5//! we maintain a small overlay of changes.
6//!
7//! This is critical for performance at high block heights where the UTXO set
8//! contains ~75 million entries.
9//!
10//! ## Design
11//!
12//! ```text
13//! ┌─────────────────┐
14//! │  UtxoOverlay    │  ← Small (block-sized changes)
15//! │  - additions    │
16//! │  - deletions    │
17//! └────────┬────────┘
18//!          │ fallback
19//! ┌────────▼────────┐
20//! │  Base UtxoSet   │  ← Large (millions of UTXOs), read-only
21//! └─────────────────┘
22//! ```
23//!
24//! ## Performance
25//!
26//! - Clone: O(1) instead of O(n)
27//! - Lookup: O(1) with small constant overhead
28//! - Insert: O(1)
29//! - Memory: O(block_tx_count) instead of O(utxo_set_size)
30
31use crate::types::{OutPoint, UtxoSet, UTXO};
32#[cfg(feature = "production")]
33use rustc_hash::{FxHashMap, FxHashSet};
34#[cfg(not(feature = "production"))]
35use std::collections::{HashMap as FxHashMap, HashSet as FxHashSet};
36
37/// Fixed-size key for deletions set — avoids OutPoint clone in remove() (~3k inputs/block).
38/// Same encoding as `outpoint_to_key` below: 32-byte txid + 4-byte big-endian vout.
39pub type UtxoDeletionKey = [u8; 36];
40
41type OutPointKey = UtxoDeletionKey;
42
43#[inline]
44fn outpoint_to_key(op: &OutPoint) -> OutPointKey {
45    let mut key = [0u8; 36];
46    key[..32].copy_from_slice(&op.hash);
47    key[32..36].copy_from_slice(&op.index.to_be_bytes());
48    key
49}
50
51#[inline]
52pub fn utxo_deletion_key_to_outpoint(key: &UtxoDeletionKey) -> OutPoint {
53    let mut hash = [0u8; 32];
54    hash.copy_from_slice(&key[..32]);
55    let index = u32::from_be_bytes(key[32..36].try_into().unwrap());
56    OutPoint { hash, index }
57}
58
59#[inline]
60fn key_to_outpoint(key: &OutPointKey) -> OutPoint {
61    utxo_deletion_key_to_outpoint(key)
62}
63
64/// Trait for UTXO lookups - implemented by both UtxoSet and UtxoOverlay.
65///
66/// This allows check_tx_inputs and other validation functions to work
67/// with either type without code duplication.
68pub trait UtxoLookup {
69    /// Look up a UTXO by outpoint.
70    fn get(&self, outpoint: &OutPoint) -> Option<&UTXO>;
71
72    /// Check if a UTXO exists.
73    fn contains_key(&self, outpoint: &OutPoint) -> bool {
74        self.get(outpoint).is_some()
75    }
76
77    /// Get the number of UTXOs (approximate for overlays).
78    fn len(&self) -> usize;
79
80    /// Check if empty.
81    fn is_empty(&self) -> bool {
82        self.len() == 0
83    }
84}
85
86/// Implementation for standard UtxoSet (HashMap / FxHashMap).
87#[cfg(feature = "production")]
88impl UtxoLookup for UtxoSet {
89    #[inline]
90    fn get(&self, outpoint: &OutPoint) -> Option<&UTXO> {
91        FxHashMap::get(self, outpoint).map(|a| a.as_ref())
92    }
93    #[inline]
94    fn contains_key(&self, outpoint: &OutPoint) -> bool {
95        FxHashMap::contains_key(self, outpoint)
96    }
97    #[inline]
98    fn len(&self) -> usize {
99        FxHashMap::len(self)
100    }
101    #[inline]
102    fn is_empty(&self) -> bool {
103        FxHashMap::is_empty(self)
104    }
105}
106#[cfg(not(feature = "production"))]
107impl UtxoLookup for UtxoSet {
108    #[inline]
109    fn get(&self, outpoint: &OutPoint) -> Option<&UTXO> {
110        std::collections::HashMap::get(self, outpoint).map(|a| a.as_ref())
111    }
112    #[inline]
113    fn contains_key(&self, outpoint: &OutPoint) -> bool {
114        std::collections::HashMap::contains_key(self, outpoint)
115    }
116    #[inline]
117    fn len(&self) -> usize {
118        std::collections::HashMap::len(self)
119    }
120    #[inline]
121    fn is_empty(&self) -> bool {
122        std::collections::HashMap::is_empty(self)
123    }
124}
125
126/// Zero-copy overlay for UTXO set modifications during block validation.
127///
128/// Provides a view over the base UTXO set that captures additions and deletions
129/// without copying the underlying data.
130#[derive(Debug)]
131pub struct UtxoOverlay<'a> {
132    /// Reference to the base UTXO set (read-only)
133    base: &'a UtxoSet,
134    /// UTXOs added in this overlay (new outputs from current block)
135    additions: FxHashMap<OutPoint, std::sync::Arc<UTXO>>,
136    /// UTXOs marked as spent in this overlay (OutPointKey to avoid clone in remove hot path)
137    deletions: FxHashSet<OutPointKey>,
138    /// OPTIMIZATION #3: Cached flag to skip deletions.contains() check when empty
139    has_deletions: bool,
140}
141
142impl<'a> UtxoOverlay<'a> {
143    /// Create a new overlay over the given UTXO set.
144    ///
145    /// This is O(1) - no copying of the base set.
146    #[inline]
147    pub fn new(base: &'a UtxoSet) -> Self {
148        Self {
149            base,
150            // Pre-allocate for typical block size (~3000 transactions)
151            additions: FxHashMap::with_capacity_and_hasher(6000, Default::default()),
152            deletions: FxHashSet::with_capacity_and_hasher(6000, Default::default()),
153            has_deletions: false, // OPTIMIZATION #3: Start with no deletions
154        }
155    }
156
157    /// Create a new overlay with custom capacity hints.
158    #[inline]
159    pub fn with_capacity(base: &'a UtxoSet, additions_cap: usize, deletions_cap: usize) -> Self {
160        Self {
161            base,
162            additions: FxHashMap::with_capacity_and_hasher(additions_cap, Default::default()),
163            deletions: FxHashSet::with_capacity_and_hasher(deletions_cap, Default::default()),
164            has_deletions: false, // OPTIMIZATION #3: Start with no deletions
165        }
166    }
167
168    /// Look up a UTXO by outpoint.
169    ///
170    /// First checks deletions, then additions, then base set.
171    #[inline]
172    pub fn get(&self, outpoint: &OutPoint) -> Option<&UTXO> {
173        if self.has_deletions && self.deletions.contains(&outpoint_to_key(outpoint)) {
174            return None;
175        }
176
177        // Check additions first (for intra-block spending)
178        if let Some(arc) = self.additions.get(outpoint) {
179            return Some(arc.as_ref());
180        }
181
182        // Fall back to base set (UtxoSet holds Arc<UTXO>; deref to &UTXO)
183        self.base.get(outpoint).map(|a| a.as_ref())
184    }
185
186    /// Check if a UTXO exists.
187    #[inline]
188    pub fn contains_key(&self, outpoint: &OutPoint) -> bool {
189        if self.has_deletions && self.deletions.contains(&outpoint_to_key(outpoint)) {
190            return false;
191        }
192        self.additions.contains_key(outpoint) || self.base.contains_key(outpoint)
193    }
194
195    /// Add a new UTXO (created by a transaction in this block).
196    #[inline]
197    pub fn insert(&mut self, outpoint: OutPoint, utxo: UTXO) {
198        self.insert_arc(outpoint, std::sync::Arc::new(utxo));
199    }
200
201    /// Add a new UTXO from Arc. Use when sharing with undo log.
202    #[inline]
203    pub fn insert_arc(&mut self, outpoint: OutPoint, utxo: std::sync::Arc<UTXO>) {
204        if self.deletions.remove(&outpoint_to_key(&outpoint)) {
205            // Update flag if deletions set becomes empty
206            if self.deletions.is_empty() {
207                self.has_deletions = false;
208            }
209        }
210        self.additions.insert(outpoint, utxo);
211    }
212
213    /// Mark a UTXO as spent without returning it. Use when the return value is discarded (e.g. IBD path).
214    /// Avoids cloning the UTXO (~50 bytes per input).
215    #[inline]
216    pub fn mark_spent(&mut self, outpoint: &OutPoint) {
217        // Check additions first (intra-block spend)
218        if self.additions.remove(outpoint).is_some() {
219            return;
220        }
221        // Mark as deleted from base set (no clone — caller doesn't need the UTXO)
222        if self.base.contains_key(outpoint) {
223            self.deletions.insert(outpoint_to_key(outpoint));
224            self.has_deletions = true;
225        }
226    }
227
228    /// Mark a UTXO as spent (consumed by a transaction in this block).
229    /// Returns the UTXO for undo-log path. Use `mark_spent` when return value is discarded.
230    #[inline]
231    pub fn remove(&mut self, outpoint: &OutPoint) -> Option<std::sync::Arc<UTXO>> {
232        // Check additions first (intra-block spend)
233        if let Some(arc) = self.additions.remove(outpoint) {
234            return Some(arc);
235        }
236        // Mark as deleted from base set (base holds Arc<UTXO>; share Arc for undo)
237        if let Some(arc) = self.base.get(outpoint) {
238            self.deletions.insert(outpoint_to_key(outpoint));
239            self.has_deletions = true;
240            return Some(std::sync::Arc::clone(arc));
241        }
242
243        None
244    }
245
246    /// Get the number of additions in this overlay.
247    #[inline]
248    pub fn additions_len(&self) -> usize {
249        self.additions.len()
250    }
251
252    /// Get the number of deletions in this overlay.
253    #[inline]
254    pub fn deletions_len(&self) -> usize {
255        self.deletions.len()
256    }
257
258    /// Get the size of the base UTXO set.
259    #[inline]
260    pub fn base_len(&self) -> usize {
261        self.base.len()
262    }
263
264    /// Apply the overlay changes to produce a new UTXO set.
265    ///
266    /// This is called at the end of successful block validation.
267    /// Base clone is cheap (Arc refcount only); additions wrapped in Arc.
268    pub fn apply_to_base(self) -> UtxoSet {
269        let mut result = self.base.clone();
270        for key in self.deletions {
271            result.remove(&key_to_outpoint(&key));
272        }
273        for (outpoint, arc) in self.additions {
274            result.insert(outpoint, arc);
275        }
276        result
277    }
278
279    /// Get immutable access to additions (for undo log generation).
280    #[inline]
281    pub fn additions(&self) -> &FxHashMap<OutPoint, std::sync::Arc<UTXO>> {
282        &self.additions
283    }
284
285    /// Get immutable access to deletion keys (for undo log generation).
286    #[inline]
287    pub fn deletions(&self) -> &FxHashSet<OutPointKey> {
288        &self.deletions
289    }
290
291    /// Consume the overlay and return its changes as owned data.
292    ///
293    /// This releases the borrow on the base UTXO set, allowing the caller
294    /// to apply the changes directly to the mutable utxo_set without
295    /// re-iterating all transactions.
296    ///
297    /// Returns (additions, deletions) — the net UTXO changes from this block.
298    #[inline]
299    pub fn into_changes(
300        self,
301    ) -> (
302        FxHashMap<OutPoint, std::sync::Arc<UTXO>>,
303        FxHashSet<UtxoDeletionKey>,
304    ) {
305        (self.additions, self.deletions)
306    }
307}
308
309/// Implementation of UtxoLookup for UtxoOverlay.
310impl<'a> UtxoLookup for UtxoOverlay<'a> {
311    #[inline]
312    fn get(&self, outpoint: &OutPoint) -> Option<&UTXO> {
313        if self.has_deletions && self.deletions.contains(&outpoint_to_key(outpoint)) {
314            return None;
315        }
316        if let Some(arc) = self.additions.get(outpoint) {
317            return Some(arc.as_ref());
318        }
319        self.base.get(outpoint).map(|a| a.as_ref())
320    }
321
322    #[inline]
323    fn contains_key(&self, outpoint: &OutPoint) -> bool {
324        if self.has_deletions && self.deletions.contains(&outpoint_to_key(outpoint)) {
325            return false;
326        }
327        self.additions.contains_key(outpoint) || self.base.contains_key(outpoint)
328    }
329
330    #[inline]
331    fn len(&self) -> usize {
332        // Approximate: base size + additions - deletions
333        // Note: This may not be exact if additions shadow base entries
334        self.base.len() + self.additions.len() - self.deletions.len()
335    }
336}
337
338/// Fast UTXO set using FxHash for better performance.
339///
340/// FxHashMap uses a faster hash function than the default SipHash.
341/// For fixed-size keys like OutPoint (36 bytes), this is 2-3x faster.
342#[cfg(feature = "production")]
343pub type FastUtxoSet = rustc_hash::FxHashMap<OutPoint, UTXO>;
344
345#[cfg(not(feature = "production"))]
346pub type FastUtxoSet = std::collections::HashMap<OutPoint, UTXO>;
347
348/// Convert standard UtxoSet to FastUtxoSet (clones UTXOs).
349#[cfg(feature = "production")]
350#[inline]
351pub fn to_fast_utxo_set(utxo_set: &UtxoSet) -> FastUtxoSet {
352    utxo_set.iter().map(|(k, v)| (*k, (**v).clone())).collect()
353}
354
355#[cfg(not(feature = "production"))]
356#[inline]
357pub fn to_fast_utxo_set(utxo_set: &UtxoSet) -> FastUtxoSet {
358    utxo_set.iter().map(|(k, v)| (*k, (**v).clone())).collect()
359}
360
361/// Apply a transaction to the overlay (for validation phase).
362///
363/// This is the overlay-compatible version of `apply_transaction_with_id`.
364/// It modifies the overlay in place rather than returning a new UTXO set.
365///
366/// Returns undo entries for use in block undo log (optional in validation phase).
367#[inline]
368pub fn apply_transaction_to_overlay(
369    overlay: &mut UtxoOverlay<'_>,
370    tx: &crate::types::Transaction,
371    tx_id: crate::types::Hash,
372    height: crate::types::Natural,
373) -> Vec<crate::reorganization::UndoEntry> {
374    use crate::reorganization::UndoEntry;
375    use crate::transaction::is_coinbase;
376
377    let mut undo_entries = Vec::with_capacity(tx.inputs.len() + tx.outputs.len());
378
379    // Remove spent inputs (except for coinbase)
380    if !is_coinbase(tx) {
381        for input in &tx.inputs {
382            // Record the UTXO that existed before (for restoration during disconnect)
383            if let Some(previous_utxo) = overlay.remove(&input.prevout) {
384                undo_entries.push(UndoEntry {
385                    outpoint: input.prevout,
386                    previous_utxo: Some(previous_utxo),
387                    new_utxo: None, // This UTXO is being spent
388                });
389            }
390        }
391    }
392
393    // Add new outputs
394    for (i, output) in tx.outputs.iter().enumerate() {
395        let outpoint = OutPoint {
396            hash: tx_id,
397            index: i as u32,
398        };
399
400        let utxo = UTXO {
401            value: output.value,
402            script_pubkey: output.script_pubkey.as_slice().into(),
403            height,
404            is_coinbase: is_coinbase(tx),
405        };
406
407        let utxo_arc = std::sync::Arc::new(utxo);
408        // Record new UTXO for undo log
409        undo_entries.push(UndoEntry {
410            outpoint,
411            previous_utxo: None, // Newly created
412            new_utxo: Some(std::sync::Arc::clone(&utxo_arc)),
413        });
414
415        overlay.insert_arc(outpoint, utxo_arc);
416    }
417
418    undo_entries
419}
420
421/// Apply a transaction to the overlay WITHOUT building undo entries.
422///
423/// This is the fast path for IBD where undo logs are discarded.
424/// Avoids cloning outpoints and UTXOs for undo entries, saving
425/// significant allocation overhead (2 clones per input, 2 per output).
426#[inline]
427pub fn apply_transaction_to_overlay_no_undo(
428    overlay: &mut UtxoOverlay<'_>,
429    tx: &crate::types::Transaction,
430    tx_id: crate::types::Hash,
431    height: crate::types::Natural,
432) {
433    let is_cb = crate::transaction::is_coinbase(tx);
434
435    if !is_cb {
436        for input in &tx.inputs {
437            overlay.mark_spent(&input.prevout);
438        }
439    }
440    for (i, output) in tx.outputs.iter().enumerate() {
441        let outpoint = OutPoint {
442            hash: tx_id,
443            index: i as u32,
444        };
445
446        let utxo = UTXO {
447            value: output.value,
448            script_pubkey: output.script_pubkey.as_slice().into(),
449            height,
450            is_coinbase: is_cb,
451        };
452
453        overlay.insert(outpoint, utxo);
454    }
455}
456
457#[cfg(test)]
458mod tests {
459    use super::*;
460    use crate::utxo_set_insert;
461
462    fn make_outpoint(idx: u8) -> OutPoint {
463        OutPoint {
464            hash: [idx; 32],
465            index: idx as u32,
466        }
467    }
468
469    fn make_utxo(value: i64) -> UTXO {
470        UTXO {
471            value,
472            script_pubkey: vec![crate::opcodes::OP_DUP, crate::opcodes::OP_HASH160].into(), // P2PKH prefix
473            height: 1,
474            is_coinbase: false,
475        }
476    }
477
478    #[test]
479    fn test_overlay_lookup_from_base() {
480        let mut base = UtxoSet::default();
481        utxo_set_insert(&mut base, make_outpoint(1), make_utxo(1000));
482        utxo_set_insert(&mut base, make_outpoint(2), make_utxo(2000));
483
484        let overlay = UtxoOverlay::new(&base);
485
486        assert_eq!(overlay.get(&make_outpoint(1)).unwrap().value, 1000);
487        assert_eq!(overlay.get(&make_outpoint(2)).unwrap().value, 2000);
488        assert!(overlay.get(&make_outpoint(3)).is_none());
489    }
490
491    #[test]
492    fn test_overlay_additions() {
493        let base = UtxoSet::default();
494        let mut overlay = UtxoOverlay::new(&base);
495
496        overlay.insert(make_outpoint(1), make_utxo(1000));
497
498        assert_eq!(overlay.get(&make_outpoint(1)).unwrap().value, 1000);
499        assert_eq!(overlay.additions_len(), 1);
500    }
501
502    #[test]
503    fn test_overlay_deletions() {
504        let mut base = UtxoSet::default();
505        utxo_set_insert(&mut base, make_outpoint(1), make_utxo(1000));
506
507        let mut overlay = UtxoOverlay::new(&base);
508
509        let removed = overlay.remove(&make_outpoint(1));
510        assert_eq!(removed.unwrap().value, 1000);
511        assert!(overlay.get(&make_outpoint(1)).is_none());
512        assert_eq!(overlay.deletions_len(), 1);
513    }
514
515    #[test]
516    fn test_overlay_intra_block_spend() {
517        // Simulate spending an output created in the same block
518        let base = UtxoSet::default();
519        let mut overlay = UtxoOverlay::new(&base);
520
521        // Add output
522        overlay.insert(make_outpoint(1), make_utxo(1000));
523        assert!(overlay.get(&make_outpoint(1)).is_some());
524
525        // Spend it in same block
526        let removed = overlay.remove(&make_outpoint(1));
527        assert_eq!(removed.unwrap().value, 1000);
528        assert!(overlay.get(&make_outpoint(1)).is_none());
529
530        // Should not be in deletions (was only in additions)
531        assert_eq!(overlay.deletions_len(), 0);
532        assert_eq!(overlay.additions_len(), 0);
533    }
534
535    #[test]
536    fn test_overlay_apply() {
537        let mut base = UtxoSet::default();
538        utxo_set_insert(&mut base, make_outpoint(1), make_utxo(1000));
539        utxo_set_insert(&mut base, make_outpoint(2), make_utxo(2000));
540
541        let mut overlay = UtxoOverlay::new(&base);
542
543        // Remove one, add one
544        overlay.remove(&make_outpoint(1));
545        overlay.insert(make_outpoint(3), make_utxo(3000));
546
547        let result = overlay.apply_to_base();
548
549        assert!(result.get(&make_outpoint(1)).is_none()); // Removed
550        assert_eq!(result.get(&make_outpoint(2)).unwrap().value, 2000); // Unchanged
551        assert_eq!(result.get(&make_outpoint(3)).unwrap().value, 3000); // Added
552    }
553
554    #[test]
555    fn test_overlay_no_clone_on_creation() {
556        // This test verifies the design - overlay creation is O(1)
557        let mut base = UtxoSet::default();
558        for i in 0..10000 {
559            utxo_set_insert(&mut base, make_outpoint(i as u8), make_utxo(i as i64));
560        }
561
562        // Creating overlay should be instant (no clone)
563        let start = std::time::Instant::now();
564        let _overlay = UtxoOverlay::new(&base);
565        let elapsed = start.elapsed();
566
567        // Should be sub-microsecond (just pointer + empty hashmap allocation)
568        assert!(
569            elapsed.as_micros() < 100,
570            "Overlay creation took {:?}",
571            elapsed
572        );
573    }
574}