Skip to main content

blvm_protocol/utxo_commitments/
initial_sync.rs

1//! Initial Sync Algorithm
2//!
3//! Implements the peer consensus initial sync algorithm:
4//! 1. Discover diverse peers
5//! 2. Determine checkpoint height
6//! 3. Request UTXO sets from peers
7//! 4. Find consensus
8//! 5. Verify against block headers
9//! 6. Download UTXO set
10
11use crate::spam_filter::{SpamBreakdown, SpamFilter, SpamFilterConfig, SpamSummary, SpamType};
12#[cfg(feature = "utxo-commitments")]
13use crate::utxo_commitments::data_structures::{
14    UtxoCommitment, UtxoCommitmentError, UtxoCommitmentResult,
15};
16#[cfg(feature = "utxo-commitments")]
17use crate::utxo_commitments::merkle_tree::UtxoMerkleTree;
18#[cfg(feature = "utxo-commitments")]
19use crate::utxo_commitments::network_integration::UtxoCommitmentsNetworkClient;
20#[cfg(feature = "utxo-commitments")]
21use crate::utxo_commitments::peer_consensus::{ConsensusConfig, PeerConsensus, PeerInfo};
22#[cfg(feature = "utxo-commitments")]
23use blvm_consensus::types::{
24    BlockHeader, Hash as HashType, Natural, OutPoint, Transaction, UtxoSet, UTXO,
25};
26#[cfg(feature = "utxo-commitments")]
27/// Initial sync manager
28pub struct InitialSync {
29    peer_consensus: PeerConsensus,
30    spam_filter: SpamFilter,
31    // In real implementation: network_client: NetworkClient,
32}
33
34impl InitialSync {
35    /// Create a new initial sync manager
36    pub fn new(config: ConsensusConfig) -> Self {
37        Self {
38            peer_consensus: PeerConsensus::new(config),
39            spam_filter: SpamFilter::new(),
40        }
41    }
42
43    /// Create a new initial sync manager with custom spam filter config
44    pub fn with_spam_filter(config: ConsensusConfig, spam_filter_config: SpamFilterConfig) -> Self {
45        Self {
46            peer_consensus: PeerConsensus::new(config),
47            spam_filter: SpamFilter::with_config(spam_filter_config),
48        }
49    }
50
51    /// Execute initial sync algorithm
52    ///
53    /// Performs the complete initial sync process:
54    /// 1. Discover diverse peers
55    /// 2. Determine checkpoint height
56    /// 3. Request UTXO sets
57    /// 4. Find consensus
58    /// 5. Verify against headers
59    /// 6. Return verified UTXO commitment
60    pub async fn execute_initial_sync<C: UtxoCommitmentsNetworkClient>(
61        &self,
62        peers: &[(PeerInfo, String)],
63        header_chain: &[BlockHeader],
64        network_client: &C,
65    ) -> UtxoCommitmentResult<UtxoCommitment> {
66        // Step 1: Discover diverse peers (based on consensus-level PeerInfo)
67        let all_infos: Vec<PeerInfo> = peers.iter().map(|(info, _)| info.clone()).collect();
68        let diverse_infos = self.peer_consensus.discover_diverse_peers(all_infos);
69
70        if diverse_infos.len() < self.peer_consensus.config.min_peers {
71            return Err(UtxoCommitmentError::VerificationFailed(format!(
72                "Insufficient diverse peers: got {}, need {}",
73                diverse_infos.len(),
74                self.peer_consensus.config.min_peers
75            )));
76        }
77
78        // Re-attach peer IDs to the diverse set
79        let diverse_with_ids: Vec<(PeerInfo, String)> = peers
80            .iter()
81            .filter(|(info, _)| {
82                diverse_infos
83                    .iter()
84                    .any(|d| d.address == info.address && d.subnet == info.subnet)
85            })
86            .cloned()
87            .collect();
88
89        // Step 2: Determine checkpoint height
90        // Note: Peer tip queries would require additional network protocol support.
91        // For now, we use the header chain fallback which is sufficient for initial sync.
92        let peer_tips: Vec<Natural> = vec![];
93        let checkpoint_height = if !peer_tips.is_empty() {
94            self.peer_consensus.determine_checkpoint_height(peer_tips)
95        } else if !header_chain.is_empty() {
96            let tip = header_chain.len() as Natural - 1;
97            if tip > self.peer_consensus.config.safety_margin {
98                tip - self.peer_consensus.config.safety_margin
99            } else {
100                0
101            }
102        } else {
103            return Err(UtxoCommitmentError::VerificationFailed(
104                "No header chain or peer tips available".to_string(),
105            ));
106        };
107
108        // Get checkpoint block hash from header chain (unchanged)
109        if checkpoint_height as usize >= header_chain.len() {
110            return Err(UtxoCommitmentError::VerificationFailed(format!(
111                "Checkpoint height {} exceeds header chain length {}",
112                checkpoint_height,
113                header_chain.len()
114            )));
115        }
116
117        let checkpoint_header = &header_chain[checkpoint_height as usize];
118        let checkpoint_hash = compute_block_hash(checkpoint_header);
119
120        // Step 3: Request UTXO sets from diverse peers via the network client
121        let peer_commitments = self
122            .peer_consensus
123            .request_utxo_sets(
124                network_client,
125                &diverse_with_ids,
126                checkpoint_height,
127                checkpoint_hash,
128            )
129            .await;
130
131        // Step 4: Find consensus
132        let consensus = self.peer_consensus.find_consensus(peer_commitments)?;
133
134        // Step 5: Verify consensus commitment against block headers
135        self.peer_consensus
136            .verify_consensus_commitment(&consensus, header_chain)?;
137
138        // Step 6: Optionally verify UTXO proofs for critical UTXOs
139        // This prevents coin freezing attacks where malicious peers provide
140        // commitments with correct total supply but missing/modified UTXOs.
141        // Note: This requires network protocol support for proof requests.
142        // For now, this is optional and can be enabled when needed.
143        #[cfg(feature = "utxo-proof-verification")]
144        {
145            // Verify proofs for wallet UTXOs or random sampling
146            // Implementation depends on having access to wallet UTXOs
147            // or implementing random sampling strategy
148        }
149
150        // Step 7: Return verified commitment
151        Ok(consensus.commitment)
152    }
153
154    /// Complete sync from checkpoint to current tip
155    ///
156    /// Syncs forward from checkpoint using FULL blocks with complete validation.
157    /// Fully validates all transactions (signatures, scripts) before updating UTXO set.
158    ///
159    /// # Arguments
160    ///
161    /// * `utxo_tree` - UTXO Merkle tree to update incrementally
162    /// * `checkpoint_height` - Starting height (checkpoint)
163    /// * `current_tip` - Ending height (current chain tip)
164    /// * `network_client` - Network client for requesting blocks
165    /// * `get_block_hash` - Function to get block hash for a given height
166    /// * `peer_id` - Peer ID to request blocks from
167    /// * `network` - Network type (Mainnet, Testnet, Regtest)
168    /// * `network_time` - Current network time (Unix timestamp)
169    /// * `recent_headers` - Recent block headers for median time-past calculation
170    /// * `checkpoint_utxo_set` - Full UTXO set at checkpoint (required for validation)
171    ///                          If None, starts with empty set (cannot verify checkpoint commitment until end)
172    ///
173    /// # Implementation
174    ///
175    /// 1. Requests FULL blocks from checkpoint+1 to tip
176    /// 2. For each full block:
177    ///    - Fully validates block with connect_block() (signatures, scripts, all consensus rules)
178    ///    - Updates UTXO set from validated result
179    ///    - Updates UTXO tree from validated UTXO set
180    ///    - Verifies commitment matches computed root
181    /// 3. Updates UTXO tree incrementally after validation
182    ///
183    /// **Security**: All transactions are cryptographically verified before UTXO set update.
184    pub async fn complete_sync_from_checkpoint<C, F, Fut>(
185        &self,
186        utxo_tree: &mut UtxoMerkleTree,
187        checkpoint_height: Natural,
188        current_tip: Natural,
189        network_client: &C,
190        get_block_hash: F,
191        peer_id: &str,
192        network: crate::types::Network,
193        network_time: u64,
194        recent_headers: Option<&[BlockHeader]>,
195        checkpoint_utxo_set: Option<UtxoSet>,
196    ) -> UtxoCommitmentResult<()>
197    where
198        C: UtxoCommitmentsNetworkClient,
199        F: Fn(Natural) -> Fut,
200        Fut: std::future::Future<Output = UtxoCommitmentResult<HashType>>,
201    {
202        use crate::block::connect_block;
203
204        // Start with checkpoint UTXO set, or empty if not provided
205        // Note: If empty, we cannot verify checkpoint commitment until we've built the full set
206        let mut utxo_set: UtxoSet = checkpoint_utxo_set.unwrap_or_default();
207
208        // Process blocks incrementally from checkpoint+1 to current tip
209        for height in checkpoint_height + 1..=current_tip {
210            // Get block hash for this height
211            let block_hash = get_block_hash(height).await?;
212
213            // Request FULL block (not filtered) from network peer
214            // This includes all transactions with witnesses for complete validation
215            let full_block = network_client
216                .request_full_block(peer_id, block_hash)
217                .await?;
218
219            // Verify block header height matches expected height
220            if full_block.block.header.timestamp == 0 {
221                return Err(UtxoCommitmentError::VerificationFailed(format!(
222                    "Invalid block header at height {}",
223                    height
224                )));
225            }
226
227            let context = crate::block::block_validation_context_for_connect_ibd(
228                recent_headers,
229                network_time,
230                network,
231            );
232            let (validation_result, new_utxo_set, _undo_log) = connect_block(
233                &full_block.block,
234                &full_block.witnesses,
235                utxo_set.clone(),
236                height,
237                &context,
238            )
239            .map_err(|e| {
240                UtxoCommitmentError::VerificationFailed(format!(
241                    "connect_block failed at height {}: {}",
242                    height, e
243                ))
244            })?;
245
246            // Reject if validation failed
247            if !matches!(
248                validation_result,
249                blvm_consensus::types::ValidationResult::Valid
250            ) {
251                return Err(UtxoCommitmentError::VerificationFailed(format!(
252                    "Block validation failed at height {}: {:?}",
253                    height, validation_result
254                )));
255            }
256
257            // Update UTXO tree from validated UTXO set
258            // This ensures the tree matches the cryptographically verified state
259            // Pass old utxo_set to detect removals efficiently
260            let old_utxo_set = utxo_set.clone();
261            utxo_tree.update_from_utxo_set(&new_utxo_set, &old_utxo_set)?;
262
263            // Generate commitment from validated UTXO tree
264            let computed_block_hash = compute_block_hash(&full_block.block.header);
265            let computed_commitment = utxo_tree.generate_commitment(computed_block_hash, height);
266
267            // Verify commitment supply matches expected
268            use blvm_consensus::economic::total_supply;
269            let expected_supply = total_supply(height) as u64;
270            if computed_commitment.total_supply != expected_supply {
271                return Err(UtxoCommitmentError::VerificationFailed(format!(
272                    "Supply mismatch at height {}: computed {}, expected {}",
273                    height, computed_commitment.total_supply, expected_supply
274                )));
275            }
276
277            // Note: Commitment root, height, and block hash are verified implicitly
278            // by the fact that we computed them from the validated UTXO set
279
280            // Update utxo_set for next iteration
281            utxo_set = new_utxo_set;
282        }
283
284        Ok(())
285    }
286
287    /// Process a filtered block and update UTXO set
288    ///
289    /// Takes a block with transactions (already filtered or to be filtered),
290    /// applies spam filter, updates UTXO set, and verifies commitment.
291    ///
292    /// **Critical**: This function processes ALL transactions to remove spent inputs,
293    /// but only adds non-spam outputs to the UTXO tree. This ensures UTXO set consistency:
294    /// - Spam transactions that spend non-spam inputs will still remove those inputs
295    /// - Only non-spam outputs are added to the tree (bandwidth savings)
296    /// - UTXO set remains consistent with actual blockchain state
297    ///
298    /// Note: This function applies transactions to the UTXO tree for commitment
299    /// purposes. Full signature verification should be done during block validation
300    /// before calling this function. This function assumes transactions are already
301    /// validated.
302    pub fn process_filtered_block(
303        &self,
304        utxo_tree: &mut UtxoMerkleTree,
305        block_height: Natural,
306        block_transactions: &[Transaction],
307    ) -> UtxoCommitmentResult<(SpamSummary, HashType)> {
308        use blvm_consensus::transaction::is_coinbase;
309
310        let mut spam_summary = SpamSummary {
311            filtered_count: 0,
312            filtered_size: 0,
313            by_type: SpamBreakdown::default(),
314        };
315
316        // Process ALL transactions (including spam) to remove spent inputs
317        // This is critical for UTXO set consistency: even spam transactions must
318        // remove their spent inputs from the tree.
319        for tx in block_transactions {
320            // Check if transaction is spam (for output filtering)
321            let spam_result = self.spam_filter.is_spam(tx);
322            let is_spam = spam_result.is_spam;
323
324            // Update spam summary
325            if is_spam {
326                spam_summary.filtered_count += 1;
327                // Estimate transaction size (simplified calculation)
328                let tx_size = 4 + 1 + 1 + 4 + // version + input_count + output_count + locktime
329                    (tx.inputs.len() as u64 * 150) + // inputs
330                    tx.outputs.iter().map(|out| 8 + out.script_pubkey.len() as u64).sum::<u64>(); // outputs
331                spam_summary.filtered_size += tx_size;
332
333                // Update breakdown
334                for spam_type in &spam_result.detected_types {
335                    match spam_type {
336                        SpamType::Ordinals => {
337                            spam_summary.by_type.ordinals += 1;
338                        }
339                        SpamType::Dust => {
340                            spam_summary.by_type.dust += 1;
341                        }
342                        SpamType::BRC20 => {
343                            spam_summary.by_type.brc20 += 1;
344                        }
345                        SpamType::LargeWitness => {
346                            spam_summary.by_type.ordinals += 1; // Count as Ordinals
347                        }
348                        SpamType::LowFeeRate => {
349                            spam_summary.by_type.dust += 1; // Count as suspicious
350                        }
351                        SpamType::HighSizeValueRatio => {
352                            spam_summary.by_type.ordinals += 1; // Count as Ordinals
353                        }
354                        SpamType::ManySmallOutputs => {
355                            spam_summary.by_type.dust += 1; // Count as dust-like
356                        }
357                        SpamType::NotSpam => {}
358                    }
359                }
360            }
361
362            // Compute transaction ID for proper outpoint creation
363            let tx_id = compute_tx_id(tx);
364
365            // CRITICAL: Remove spent inputs from ALL transactions (including spam)
366            // This ensures UTXO set consistency even when spam transactions spend non-spam inputs
367            if !is_coinbase(tx) {
368                for input in &tx.inputs {
369                    // Get the UTXO first (needed for remove to update tracking)
370                    match utxo_tree.get(&input.prevout) {
371                        Ok(Some(utxo)) => {
372                            // Remove the UTXO (even if transaction is spam)
373                            if let Err(e) = utxo_tree.remove(&input.prevout, &utxo) {
374                                return Err(UtxoCommitmentError::TransactionApplication(format!(
375                                    "Failed to remove spent input: {:?}",
376                                    e
377                                )));
378                            }
379                        }
380                        Ok(None) => {
381                            // UTXO doesn't exist - this might be valid if it was already spent
382                            // or invalid if the transaction wasn't properly validated
383                            // Continue but log - this should be validated before calling
384                        }
385                        Err(e) => {
386                            return Err(UtxoCommitmentError::TransactionApplication(format!(
387                                "Failed to get UTXO for removal: {:?}",
388                                e
389                            )));
390                        }
391                    }
392                }
393            }
394
395            // Only add outputs from non-spam transactions
396            // This provides bandwidth savings while maintaining UTXO set consistency
397            if !is_spam {
398                for (i, output) in tx.outputs.iter().enumerate() {
399                    let outpoint = OutPoint {
400                        hash: tx_id,
401                        index: i as u32,
402                    };
403
404                    let utxo = UTXO {
405                        value: output.value,
406                        script_pubkey: output.script_pubkey.as_slice().into(),
407                        height: block_height,
408                        is_coinbase: is_coinbase(tx),
409                    };
410
411                    if let Err(e) = utxo_tree.insert(outpoint, utxo) {
412                        return Err(UtxoCommitmentError::TransactionApplication(format!(
413                            "Failed to add output: {:?}",
414                            e
415                        )));
416                    }
417                }
418            }
419            // Spam transaction outputs are skipped (not added to tree)
420        }
421
422        // Return summary and new root
423        let root = utxo_tree.root();
424
425        Ok((spam_summary, root))
426    }
427}
428
429/// Update UTXO commitments after block connection
430///
431/// This function should be called after successfully connecting a block
432/// to keep UTXO commitments synchronized with the blockchain state.
433///
434/// # Arguments
435///
436/// * `utxo_tree` - Mutable reference to the UTXO Merkle tree
437/// * `block` - The block that was just connected
438/// * `block_height` - Height of the connected block
439/// * `spam_filter` - Optional spam filter (if None, all transactions are included)
440///
441/// # Returns
442///
443/// Returns the new Merkle root hash of the UTXO tree.
444///
445/// # Example
446///
447/// ```rust
448/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
449/// use blvm_consensus::block::connect_block;
450/// use blvm_protocol::utxo_commitments::{UtxoMerkleTree, update_commitments_after_block};
451/// use blvm_consensus::spam_filter::SpamFilter;
452/// use blvm_consensus::types::{Network, ValidationResult};
453///
454/// # let block = blvm_consensus::types::Block {
455/// #     header: blvm_consensus::types::BlockHeader {
456/// #         version: 1, prev_block_hash: [0; 32], merkle_root: [0; 32],
457/// #         timestamp: 1234567890, bits: 0x1d00ffff, nonce: 0,
458/// #     },
459/// #     transactions: vec![].into(),
460/// # };
461/// # let witnesses = vec![];
462/// # let utxo_set = blvm_consensus::types::UtxoSet::new();
463/// # let height = 0;
464/// # let mut utxo_tree = UtxoMerkleTree::new()?;
465/// let (result, new_utxo_set, _) = connect_block(&block, &witnesses, utxo_set, height, None, Network::Regtest)?;
466/// if matches!(result, ValidationResult::Valid) {
467///     let spam_filter = SpamFilter::new();
468///     let root = update_commitments_after_block(
469///         &mut utxo_tree,
470///         &block,
471///         height,
472///         Some(&spam_filter),
473///     )?;
474///     println!("New UTXO commitment root: {:?}", root);
475/// }
476/// # Ok(())
477/// # }
478/// ```
479#[cfg(feature = "utxo-commitments")]
480pub fn update_commitments_after_block(
481    utxo_tree: &mut UtxoMerkleTree,
482    block: &crate::types::Block,
483    block_height: Natural,
484    spam_filter: Option<&SpamFilter>,
485) -> UtxoCommitmentResult<HashType> {
486    use blvm_consensus::block::calculate_tx_id;
487    use blvm_consensus::transaction::is_coinbase;
488
489    // If spam filter is provided, use filtered processing
490    if let Some(filter) = spam_filter {
491        let initial_sync = InitialSync {
492            peer_consensus: crate::utxo_commitments::peer_consensus::PeerConsensus::new(
493                crate::utxo_commitments::peer_consensus::ConsensusConfig::default(),
494            ),
495            spam_filter: filter.clone(),
496        };
497        let (_, root) =
498            initial_sync.process_filtered_block(utxo_tree, block_height, &block.transactions)?;
499        Ok(root)
500    } else {
501        // No spam filter: process all transactions normally
502        for tx in &block.transactions {
503            let tx_id = calculate_tx_id(tx);
504
505            // Remove spent inputs (except coinbase)
506            if !is_coinbase(tx) {
507                for input in &tx.inputs {
508                    // Get the UTXO first (needed for remove)
509                    match utxo_tree.get(&input.prevout) {
510                        Ok(Some(utxo)) => {
511                            utxo_tree.remove(&input.prevout, &utxo)?;
512                        }
513                        Ok(None) => {
514                            // UTXO doesn't exist - might be invalid or already spent
515                            // Continue but this should have been caught during validation
516                        }
517                        Err(e) => {
518                            return Err(UtxoCommitmentError::TransactionApplication(format!(
519                                "Failed to get UTXO for removal: {:?}",
520                                e
521                            )));
522                        }
523                    }
524                }
525            }
526
527            // Add new outputs
528            for (i, output) in tx.outputs.iter().enumerate() {
529                let outpoint = blvm_consensus::types::OutPoint {
530                    hash: tx_id,
531                    index: i as u32,
532                };
533
534                let utxo = blvm_consensus::types::UTXO {
535                    value: output.value,
536                    script_pubkey: output.script_pubkey.as_slice().into(),
537                    height: block_height,
538                    is_coinbase: is_coinbase(tx),
539                };
540
541                utxo_tree.insert(outpoint, utxo)?;
542            }
543        }
544
545        Ok(utxo_tree.root())
546    }
547}
548
549/// Compute transaction ID (txid) using Bitcoin's standard double SHA256
550///
551/// Transaction ID is computed as: SHA256(SHA256(serialized_tx))
552/// where serialized_tx is the transaction in Bitcoin wire format (non-SegWit format).
553///
554/// Note: For SegWit transactions, the txid still uses the non-witness serialization
555/// (witness data is excluded from txid calculation).
556///
557/// This matches consensus transaction ID computation exactly.
558fn compute_tx_id(tx: &Transaction) -> HashType {
559    use crate::serialization::transaction::serialize_transaction;
560    use sha2::{Digest, Sha256};
561
562    // Serialize transaction to Bitcoin wire format (non-SegWit format for txid)
563    let serialized = serialize_transaction(tx);
564
565    // Double SHA256 (Bitcoin standard for transaction IDs)
566    let first_hash = Sha256::digest(&serialized);
567    let second_hash = Sha256::digest(first_hash);
568
569    // Convert to HashType [u8; 32]
570    let mut txid = [0u8; 32];
571    txid.copy_from_slice(&second_hash);
572
573    txid
574}
575
576/// Compute block header hash (double SHA256)
577fn compute_block_hash(header: &BlockHeader) -> HashType {
578    use sha2::{Digest, Sha256};
579
580    let mut bytes = Vec::with_capacity(80);
581    bytes.extend_from_slice(&header.version.to_le_bytes());
582    bytes.extend_from_slice(&header.prev_block_hash);
583    bytes.extend_from_slice(&header.merkle_root);
584    bytes.extend_from_slice(&header.timestamp.to_le_bytes());
585    bytes.extend_from_slice(&header.bits.to_le_bytes());
586    bytes.extend_from_slice(&header.nonce.to_le_bytes());
587
588    let first_hash = Sha256::digest(&bytes);
589    let second_hash = Sha256::digest(&first_hash);
590
591    let mut hash = [0u8; 32];
592    hash.copy_from_slice(&second_hash);
593    hash
594}