blvm_protocol/utxo_commitments/initial_sync.rs
1//! Initial Sync Algorithm
2//!
3//! Implements the peer consensus initial sync algorithm:
4//! 1. Discover diverse peers
5//! 2. Determine checkpoint height
6//! 3. Request UTXO sets from peers
7//! 4. Find consensus
8//! 5. Verify against block headers
9//! 6. Download UTXO set
10
11use crate::spam_filter::{SpamBreakdown, SpamFilter, SpamFilterConfig, SpamSummary, SpamType};
12#[cfg(feature = "utxo-commitments")]
13use crate::utxo_commitments::data_structures::{
14 UtxoCommitment, UtxoCommitmentError, UtxoCommitmentResult,
15};
16#[cfg(feature = "utxo-commitments")]
17use crate::utxo_commitments::merkle_tree::UtxoMerkleTree;
18#[cfg(feature = "utxo-commitments")]
19use crate::utxo_commitments::network_integration::UtxoCommitmentsNetworkClient;
20#[cfg(feature = "utxo-commitments")]
21use crate::utxo_commitments::peer_consensus::{ConsensusConfig, PeerConsensus, PeerInfo};
22#[cfg(feature = "utxo-commitments")]
23use blvm_consensus::types::{
24 BlockHeader, Hash as HashType, Natural, OutPoint, Transaction, UtxoSet, UTXO,
25};
26#[cfg(feature = "utxo-commitments")]
27/// Initial sync manager
28pub struct InitialSync {
29 peer_consensus: PeerConsensus,
30 spam_filter: SpamFilter,
31 // In real implementation: network_client: NetworkClient,
32}
33
34impl InitialSync {
35 /// Create a new initial sync manager
36 pub fn new(config: ConsensusConfig) -> Self {
37 Self {
38 peer_consensus: PeerConsensus::new(config),
39 spam_filter: SpamFilter::new(),
40 }
41 }
42
43 /// Create a new initial sync manager with custom spam filter config
44 pub fn with_spam_filter(config: ConsensusConfig, spam_filter_config: SpamFilterConfig) -> Self {
45 Self {
46 peer_consensus: PeerConsensus::new(config),
47 spam_filter: SpamFilter::with_config(spam_filter_config),
48 }
49 }
50
51 /// Execute initial sync algorithm
52 ///
53 /// Performs the complete initial sync process:
54 /// 1. Discover diverse peers
55 /// 2. Determine checkpoint height
56 /// 3. Request UTXO sets
57 /// 4. Find consensus
58 /// 5. Verify against headers
59 /// 6. Return verified UTXO commitment
60 pub async fn execute_initial_sync<C: UtxoCommitmentsNetworkClient>(
61 &self,
62 peers: &[(PeerInfo, String)],
63 header_chain: &[BlockHeader],
64 network_client: &C,
65 ) -> UtxoCommitmentResult<UtxoCommitment> {
66 // Step 1: Discover diverse peers (based on consensus-level PeerInfo)
67 let all_infos: Vec<PeerInfo> = peers.iter().map(|(info, _)| info.clone()).collect();
68 let diverse_infos = self.peer_consensus.discover_diverse_peers(all_infos);
69
70 if diverse_infos.len() < self.peer_consensus.config.min_peers {
71 return Err(UtxoCommitmentError::VerificationFailed(format!(
72 "Insufficient diverse peers: got {}, need {}",
73 diverse_infos.len(),
74 self.peer_consensus.config.min_peers
75 )));
76 }
77
78 // Re-attach peer IDs to the diverse set
79 let diverse_with_ids: Vec<(PeerInfo, String)> = peers
80 .iter()
81 .filter(|(info, _)| {
82 diverse_infos
83 .iter()
84 .any(|d| d.address == info.address && d.subnet == info.subnet)
85 })
86 .cloned()
87 .collect();
88
89 // Step 2: Determine checkpoint height
90 // Note: Peer tip queries would require additional network protocol support.
91 // For now, we use the header chain fallback which is sufficient for initial sync.
92 let peer_tips: Vec<Natural> = vec![];
93 let checkpoint_height = if !peer_tips.is_empty() {
94 self.peer_consensus.determine_checkpoint_height(peer_tips)
95 } else if !header_chain.is_empty() {
96 let tip = header_chain.len() as Natural - 1;
97 if tip > self.peer_consensus.config.safety_margin {
98 tip - self.peer_consensus.config.safety_margin
99 } else {
100 0
101 }
102 } else {
103 return Err(UtxoCommitmentError::VerificationFailed(
104 "No header chain or peer tips available".to_string(),
105 ));
106 };
107
108 // Get checkpoint block hash from header chain (unchanged)
109 if checkpoint_height as usize >= header_chain.len() {
110 return Err(UtxoCommitmentError::VerificationFailed(format!(
111 "Checkpoint height {} exceeds header chain length {}",
112 checkpoint_height,
113 header_chain.len()
114 )));
115 }
116
117 let checkpoint_header = &header_chain[checkpoint_height as usize];
118 let checkpoint_hash = compute_block_hash(checkpoint_header);
119
120 // Step 3: Request UTXO sets from diverse peers via the network client
121 let peer_commitments = self
122 .peer_consensus
123 .request_utxo_sets(
124 network_client,
125 &diverse_with_ids,
126 checkpoint_height,
127 checkpoint_hash,
128 )
129 .await;
130
131 // Step 4: Find consensus
132 let consensus = self.peer_consensus.find_consensus(peer_commitments)?;
133
134 // Step 5: Verify consensus commitment against block headers
135 self.peer_consensus
136 .verify_consensus_commitment(&consensus, header_chain)?;
137
138 // Step 6: Optionally verify UTXO proofs for critical UTXOs
139 // This prevents coin freezing attacks where malicious peers provide
140 // commitments with correct total supply but missing/modified UTXOs.
141 // Note: This requires network protocol support for proof requests.
142 // For now, this is optional and can be enabled when needed.
143 #[cfg(feature = "utxo-proof-verification")]
144 {
145 // Verify proofs for wallet UTXOs or random sampling
146 // Implementation depends on having access to wallet UTXOs
147 // or implementing random sampling strategy
148 }
149
150 // Step 7: Return verified commitment
151 Ok(consensus.commitment)
152 }
153
154 /// Complete sync from checkpoint to current tip
155 ///
156 /// Syncs forward from checkpoint using FULL blocks with complete validation.
157 /// Fully validates all transactions (signatures, scripts) before updating UTXO set.
158 ///
159 /// # Arguments
160 ///
161 /// * `utxo_tree` - UTXO Merkle tree to update incrementally
162 /// * `checkpoint_height` - Starting height (checkpoint)
163 /// * `current_tip` - Ending height (current chain tip)
164 /// * `network_client` - Network client for requesting blocks
165 /// * `get_block_hash` - Function to get block hash for a given height
166 /// * `peer_id` - Peer ID to request blocks from
167 /// * `network` - Network type (Mainnet, Testnet, Regtest)
168 /// * `network_time` - Current network time (Unix timestamp)
169 /// * `recent_headers` - Recent block headers for median time-past calculation
170 /// * `checkpoint_utxo_set` - Full UTXO set at checkpoint (required for validation)
171 /// If None, starts with empty set (cannot verify checkpoint commitment until end)
172 ///
173 /// # Implementation
174 ///
175 /// 1. Requests FULL blocks from checkpoint+1 to tip
176 /// 2. For each full block:
177 /// - Fully validates block with connect_block() (signatures, scripts, all consensus rules)
178 /// - Updates UTXO set from validated result
179 /// - Updates UTXO tree from validated UTXO set
180 /// - Verifies commitment matches computed root
181 /// 3. Updates UTXO tree incrementally after validation
182 ///
183 /// **Security**: All transactions are cryptographically verified before UTXO set update.
184 pub async fn complete_sync_from_checkpoint<C, F, Fut>(
185 &self,
186 utxo_tree: &mut UtxoMerkleTree,
187 checkpoint_height: Natural,
188 current_tip: Natural,
189 network_client: &C,
190 get_block_hash: F,
191 peer_id: &str,
192 network: crate::types::Network,
193 network_time: u64,
194 recent_headers: Option<&[BlockHeader]>,
195 checkpoint_utxo_set: Option<UtxoSet>,
196 ) -> UtxoCommitmentResult<()>
197 where
198 C: UtxoCommitmentsNetworkClient,
199 F: Fn(Natural) -> Fut,
200 Fut: std::future::Future<Output = UtxoCommitmentResult<HashType>>,
201 {
202 use crate::block::connect_block;
203
204 // Start with checkpoint UTXO set, or empty if not provided
205 // Note: If empty, we cannot verify checkpoint commitment until we've built the full set
206 let mut utxo_set: UtxoSet = checkpoint_utxo_set.unwrap_or_default();
207
208 // Process blocks incrementally from checkpoint+1 to current tip
209 for height in checkpoint_height + 1..=current_tip {
210 // Get block hash for this height
211 let block_hash = get_block_hash(height).await?;
212
213 // Request FULL block (not filtered) from network peer
214 // This includes all transactions with witnesses for complete validation
215 let full_block = network_client
216 .request_full_block(peer_id, block_hash)
217 .await?;
218
219 // Verify block header height matches expected height
220 if full_block.block.header.timestamp == 0 {
221 return Err(UtxoCommitmentError::VerificationFailed(format!(
222 "Invalid block header at height {}",
223 height
224 )));
225 }
226
227 let context = crate::block::block_validation_context_for_connect_ibd(
228 recent_headers,
229 network_time,
230 network,
231 );
232 let (validation_result, new_utxo_set, _undo_log) = connect_block(
233 &full_block.block,
234 &full_block.witnesses,
235 utxo_set.clone(),
236 height,
237 &context,
238 )
239 .map_err(|e| {
240 UtxoCommitmentError::VerificationFailed(format!(
241 "connect_block failed at height {}: {}",
242 height, e
243 ))
244 })?;
245
246 // Reject if validation failed
247 if !matches!(
248 validation_result,
249 blvm_consensus::types::ValidationResult::Valid
250 ) {
251 return Err(UtxoCommitmentError::VerificationFailed(format!(
252 "Block validation failed at height {}: {:?}",
253 height, validation_result
254 )));
255 }
256
257 // Update UTXO tree from validated UTXO set
258 // This ensures the tree matches the cryptographically verified state
259 // Pass old utxo_set to detect removals efficiently
260 let old_utxo_set = utxo_set.clone();
261 utxo_tree.update_from_utxo_set(&new_utxo_set, &old_utxo_set)?;
262
263 // Generate commitment from validated UTXO tree
264 let computed_block_hash = compute_block_hash(&full_block.block.header);
265 let computed_commitment = utxo_tree.generate_commitment(computed_block_hash, height);
266
267 // Verify commitment supply matches expected
268 use blvm_consensus::economic::total_supply;
269 let expected_supply = total_supply(height) as u64;
270 if computed_commitment.total_supply != expected_supply {
271 return Err(UtxoCommitmentError::VerificationFailed(format!(
272 "Supply mismatch at height {}: computed {}, expected {}",
273 height, computed_commitment.total_supply, expected_supply
274 )));
275 }
276
277 // Note: Commitment root, height, and block hash are verified implicitly
278 // by the fact that we computed them from the validated UTXO set
279
280 // Update utxo_set for next iteration
281 utxo_set = new_utxo_set;
282 }
283
284 Ok(())
285 }
286
287 /// Process a filtered block and update UTXO set
288 ///
289 /// Takes a block with transactions (already filtered or to be filtered),
290 /// applies spam filter, updates UTXO set, and verifies commitment.
291 ///
292 /// **Critical**: This function processes ALL transactions to remove spent inputs,
293 /// but only adds non-spam outputs to the UTXO tree. This ensures UTXO set consistency:
294 /// - Spam transactions that spend non-spam inputs will still remove those inputs
295 /// - Only non-spam outputs are added to the tree (bandwidth savings)
296 /// - UTXO set remains consistent with actual blockchain state
297 ///
298 /// Note: This function applies transactions to the UTXO tree for commitment
299 /// purposes. Full signature verification should be done during block validation
300 /// before calling this function. This function assumes transactions are already
301 /// validated.
302 pub fn process_filtered_block(
303 &self,
304 utxo_tree: &mut UtxoMerkleTree,
305 block_height: Natural,
306 block_transactions: &[Transaction],
307 ) -> UtxoCommitmentResult<(SpamSummary, HashType)> {
308 use blvm_consensus::transaction::is_coinbase;
309
310 let mut spam_summary = SpamSummary {
311 filtered_count: 0,
312 filtered_size: 0,
313 by_type: SpamBreakdown::default(),
314 };
315
316 // Process ALL transactions (including spam) to remove spent inputs
317 // This is critical for UTXO set consistency: even spam transactions must
318 // remove their spent inputs from the tree.
319 for tx in block_transactions {
320 // Check if transaction is spam (for output filtering)
321 let spam_result = self.spam_filter.is_spam(tx);
322 let is_spam = spam_result.is_spam;
323
324 // Update spam summary
325 if is_spam {
326 spam_summary.filtered_count += 1;
327 // Estimate transaction size (simplified calculation)
328 let tx_size = 4 + 1 + 1 + 4 + // version + input_count + output_count + locktime
329 (tx.inputs.len() as u64 * 150) + // inputs
330 tx.outputs.iter().map(|out| 8 + out.script_pubkey.len() as u64).sum::<u64>(); // outputs
331 spam_summary.filtered_size += tx_size;
332
333 // Update breakdown
334 for spam_type in &spam_result.detected_types {
335 match spam_type {
336 SpamType::Ordinals => {
337 spam_summary.by_type.ordinals += 1;
338 }
339 SpamType::Dust => {
340 spam_summary.by_type.dust += 1;
341 }
342 SpamType::BRC20 => {
343 spam_summary.by_type.brc20 += 1;
344 }
345 SpamType::LargeWitness => {
346 spam_summary.by_type.ordinals += 1; // Count as Ordinals
347 }
348 SpamType::LowFeeRate => {
349 spam_summary.by_type.dust += 1; // Count as suspicious
350 }
351 SpamType::HighSizeValueRatio => {
352 spam_summary.by_type.ordinals += 1; // Count as Ordinals
353 }
354 SpamType::ManySmallOutputs => {
355 spam_summary.by_type.dust += 1; // Count as dust-like
356 }
357 SpamType::NotSpam => {}
358 }
359 }
360 }
361
362 // Compute transaction ID for proper outpoint creation
363 let tx_id = compute_tx_id(tx);
364
365 // CRITICAL: Remove spent inputs from ALL transactions (including spam)
366 // This ensures UTXO set consistency even when spam transactions spend non-spam inputs
367 if !is_coinbase(tx) {
368 for input in &tx.inputs {
369 // Get the UTXO first (needed for remove to update tracking)
370 match utxo_tree.get(&input.prevout) {
371 Ok(Some(utxo)) => {
372 // Remove the UTXO (even if transaction is spam)
373 if let Err(e) = utxo_tree.remove(&input.prevout, &utxo) {
374 return Err(UtxoCommitmentError::TransactionApplication(format!(
375 "Failed to remove spent input: {:?}",
376 e
377 )));
378 }
379 }
380 Ok(None) => {
381 // UTXO doesn't exist - this might be valid if it was already spent
382 // or invalid if the transaction wasn't properly validated
383 // Continue but log - this should be validated before calling
384 }
385 Err(e) => {
386 return Err(UtxoCommitmentError::TransactionApplication(format!(
387 "Failed to get UTXO for removal: {:?}",
388 e
389 )));
390 }
391 }
392 }
393 }
394
395 // Only add outputs from non-spam transactions
396 // This provides bandwidth savings while maintaining UTXO set consistency
397 if !is_spam {
398 for (i, output) in tx.outputs.iter().enumerate() {
399 let outpoint = OutPoint {
400 hash: tx_id,
401 index: i as u32,
402 };
403
404 let utxo = UTXO {
405 value: output.value,
406 script_pubkey: output.script_pubkey.as_slice().into(),
407 height: block_height,
408 is_coinbase: is_coinbase(tx),
409 };
410
411 if let Err(e) = utxo_tree.insert(outpoint, utxo) {
412 return Err(UtxoCommitmentError::TransactionApplication(format!(
413 "Failed to add output: {:?}",
414 e
415 )));
416 }
417 }
418 }
419 // Spam transaction outputs are skipped (not added to tree)
420 }
421
422 // Return summary and new root
423 let root = utxo_tree.root();
424
425 Ok((spam_summary, root))
426 }
427}
428
429/// Update UTXO commitments after block connection
430///
431/// This function should be called after successfully connecting a block
432/// to keep UTXO commitments synchronized with the blockchain state.
433///
434/// # Arguments
435///
436/// * `utxo_tree` - Mutable reference to the UTXO Merkle tree
437/// * `block` - The block that was just connected
438/// * `block_height` - Height of the connected block
439/// * `spam_filter` - Optional spam filter (if None, all transactions are included)
440///
441/// # Returns
442///
443/// Returns the new Merkle root hash of the UTXO tree.
444///
445/// # Example
446///
447/// ```rust
448/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
449/// use blvm_consensus::block::connect_block;
450/// use blvm_protocol::utxo_commitments::{UtxoMerkleTree, update_commitments_after_block};
451/// use blvm_consensus::spam_filter::SpamFilter;
452/// use blvm_consensus::types::{Network, ValidationResult};
453///
454/// # let block = blvm_consensus::types::Block {
455/// # header: blvm_consensus::types::BlockHeader {
456/// # version: 1, prev_block_hash: [0; 32], merkle_root: [0; 32],
457/// # timestamp: 1234567890, bits: 0x1d00ffff, nonce: 0,
458/// # },
459/// # transactions: vec![].into(),
460/// # };
461/// # let witnesses = vec![];
462/// # let utxo_set = blvm_consensus::types::UtxoSet::new();
463/// # let height = 0;
464/// # let mut utxo_tree = UtxoMerkleTree::new()?;
465/// let (result, new_utxo_set, _) = connect_block(&block, &witnesses, utxo_set, height, None, Network::Regtest)?;
466/// if matches!(result, ValidationResult::Valid) {
467/// let spam_filter = SpamFilter::new();
468/// let root = update_commitments_after_block(
469/// &mut utxo_tree,
470/// &block,
471/// height,
472/// Some(&spam_filter),
473/// )?;
474/// println!("New UTXO commitment root: {:?}", root);
475/// }
476/// # Ok(())
477/// # }
478/// ```
479#[cfg(feature = "utxo-commitments")]
480pub fn update_commitments_after_block(
481 utxo_tree: &mut UtxoMerkleTree,
482 block: &crate::types::Block,
483 block_height: Natural,
484 spam_filter: Option<&SpamFilter>,
485) -> UtxoCommitmentResult<HashType> {
486 use blvm_consensus::block::calculate_tx_id;
487 use blvm_consensus::transaction::is_coinbase;
488
489 // If spam filter is provided, use filtered processing
490 if let Some(filter) = spam_filter {
491 let initial_sync = InitialSync {
492 peer_consensus: crate::utxo_commitments::peer_consensus::PeerConsensus::new(
493 crate::utxo_commitments::peer_consensus::ConsensusConfig::default(),
494 ),
495 spam_filter: filter.clone(),
496 };
497 let (_, root) =
498 initial_sync.process_filtered_block(utxo_tree, block_height, &block.transactions)?;
499 Ok(root)
500 } else {
501 // No spam filter: process all transactions normally
502 for tx in &block.transactions {
503 let tx_id = calculate_tx_id(tx);
504
505 // Remove spent inputs (except coinbase)
506 if !is_coinbase(tx) {
507 for input in &tx.inputs {
508 // Get the UTXO first (needed for remove)
509 match utxo_tree.get(&input.prevout) {
510 Ok(Some(utxo)) => {
511 utxo_tree.remove(&input.prevout, &utxo)?;
512 }
513 Ok(None) => {
514 // UTXO doesn't exist - might be invalid or already spent
515 // Continue but this should have been caught during validation
516 }
517 Err(e) => {
518 return Err(UtxoCommitmentError::TransactionApplication(format!(
519 "Failed to get UTXO for removal: {:?}",
520 e
521 )));
522 }
523 }
524 }
525 }
526
527 // Add new outputs
528 for (i, output) in tx.outputs.iter().enumerate() {
529 let outpoint = blvm_consensus::types::OutPoint {
530 hash: tx_id,
531 index: i as u32,
532 };
533
534 let utxo = blvm_consensus::types::UTXO {
535 value: output.value,
536 script_pubkey: output.script_pubkey.as_slice().into(),
537 height: block_height,
538 is_coinbase: is_coinbase(tx),
539 };
540
541 utxo_tree.insert(outpoint, utxo)?;
542 }
543 }
544
545 Ok(utxo_tree.root())
546 }
547}
548
549/// Compute transaction ID (txid) using Bitcoin's standard double SHA256
550///
551/// Transaction ID is computed as: SHA256(SHA256(serialized_tx))
552/// where serialized_tx is the transaction in Bitcoin wire format (non-SegWit format).
553///
554/// Note: For SegWit transactions, the txid still uses the non-witness serialization
555/// (witness data is excluded from txid calculation).
556///
557/// This matches consensus transaction ID computation exactly.
558fn compute_tx_id(tx: &Transaction) -> HashType {
559 use crate::serialization::transaction::serialize_transaction;
560 use sha2::{Digest, Sha256};
561
562 // Serialize transaction to Bitcoin wire format (non-SegWit format for txid)
563 let serialized = serialize_transaction(tx);
564
565 // Double SHA256 (Bitcoin standard for transaction IDs)
566 let first_hash = Sha256::digest(&serialized);
567 let second_hash = Sha256::digest(first_hash);
568
569 // Convert to HashType [u8; 32]
570 let mut txid = [0u8; 32];
571 txid.copy_from_slice(&second_hash);
572
573 txid
574}
575
576/// Compute block header hash (double SHA256)
577fn compute_block_hash(header: &BlockHeader) -> HashType {
578 use sha2::{Digest, Sha256};
579
580 let mut bytes = Vec::with_capacity(80);
581 bytes.extend_from_slice(&header.version.to_le_bytes());
582 bytes.extend_from_slice(&header.prev_block_hash);
583 bytes.extend_from_slice(&header.merkle_root);
584 bytes.extend_from_slice(&header.timestamp.to_le_bytes());
585 bytes.extend_from_slice(&header.bits.to_le_bytes());
586 bytes.extend_from_slice(&header.nonce.to_le_bytes());
587
588 let first_hash = Sha256::digest(&bytes);
589 let second_hash = Sha256::digest(&first_hash);
590
591 let mut hash = [0u8; 32];
592 hash.copy_from_slice(&second_hash);
593 hash
594}