blvm_protocol/spam_filter/
mod.rs

1//! Spam Filtering for UTXO Commitments
2//!
3//! Implements spam detection and filtering for Bitcoin transactions:
4//! - Ordinals/Inscriptions detection
5//! - Dust output filtering
6//! - BRC-20 pattern detection
7//! - Adaptive witness size thresholds based on script type
8//!
9//! This filter enables 40-60% bandwidth savings by skipping spam transactions
10//! during ongoing sync while maintaining consensus correctness.
11//!
12//! **Critical Design Note**: Spam filtering applies to OUTPUTS only, not entire transactions.
13//! When a spam transaction is processed:
14//! - Its spent INPUTS are still removed from the UTXO tree (maintains consistency)
15//! - Its OUTPUTS are filtered out (bandwidth savings)
16//!
17//! This ensures the UTXO tree remains consistent even when spam transactions spend
18//! non-spam inputs. The `process_filtered_block` function in `initial_sync.rs` implements
19//! this correctly by processing all transactions but only adding non-spam outputs.
20
21mod script_analyzer;
22
23pub use script_analyzer::{detect_input_script_type, ScriptType};
24
25use blvm_consensus::opcodes::*;
26use blvm_consensus::segwit::Witness;
27use blvm_consensus::types::{ByteString, Transaction, UtxoSet};
28use script_analyzer::TransactionType;
29use serde::{Deserialize, Serialize};
30
31/// Default dust threshold (546 satoshis = 0.00000546 BTC)
32pub const DEFAULT_DUST_THRESHOLD: i64 = 546;
33
34/// Default minimum fee rate threshold (satoshis per vbyte)
35/// Transactions with fee rate below this are suspicious
36pub const DEFAULT_MIN_FEE_RATE: u64 = 1;
37
38/// Default maximum witness size (bytes) - larger witness stacks suggest data embedding
39pub const DEFAULT_MAX_WITNESS_SIZE: usize = 1000;
40
41/// Default maximum transaction size to value ratio
42/// Non-monetary transactions often have very large size relative to value transferred
43pub const DEFAULT_MAX_SIZE_VALUE_RATIO: f64 = 1000.0; // bytes per satoshi
44
45/// Spam filter preset configurations
46///
47/// Presets provide easy-to-use configurations for common use cases.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum SpamFilterPreset {
50    /// No spam filtering (all transactions pass)
51    Disabled,
52    /// Lenient filtering, minimal false positives
53    /// - Higher thresholds
54    /// - Only obvious spam patterns
55    Conservative,
56    /// Balanced filtering (default)
57    /// - Standard thresholds
58    /// - Comprehensive detection
59    Moderate,
60    /// Strong filtering, may have false positives
61    /// - Lower thresholds
62    /// - Aggressive detection
63    Aggressive,
64    /// Research/strict: Ordinals = envelope/pattern only, LargeWitness separate
65    /// - ordinals_strict_mode: true (no large-witness heuristic in Ordinals)
66    /// - All spam categories enabled but properly separated
67    /// - Minimizes false positives (Miniscript, vaults not misclassified)
68    StrictInscriptions,
69}
70
71impl SpamFilterPreset {
72    /// Convert preset to configuration
73    pub fn to_config(&self) -> SpamFilterConfig {
74        match self {
75            Self::Disabled => SpamFilterConfig {
76                filter_ordinals: false,
77                filter_dust: false,
78                filter_brc20: false,
79                filter_large_witness: false,
80                filter_low_fee_rate: false,
81                filter_high_size_value_ratio: false,
82                filter_many_small_outputs: false,
83                ..SpamFilterConfig::default()
84            },
85            Self::Conservative => SpamFilterConfig {
86                filter_ordinals: true,
87                filter_dust: true,
88                filter_brc20: true,
89                filter_large_witness: true,
90                filter_low_fee_rate: false,
91                filter_high_size_value_ratio: true,
92                filter_many_small_outputs: true,
93                max_witness_size: 2000,       // Higher threshold
94                max_size_value_ratio: 2000.0, // Higher ratio
95                max_small_outputs: 20,        // More lenient
96                ..SpamFilterConfig::default()
97            },
98            Self::Moderate => SpamFilterConfig::default(),
99            Self::Aggressive => SpamFilterConfig {
100                filter_ordinals: true,
101                filter_dust: true,
102                filter_brc20: true,
103                filter_large_witness: true,
104                filter_low_fee_rate: true, // Enable fee rate filtering
105                filter_high_size_value_ratio: true,
106                filter_many_small_outputs: true,
107                max_witness_size: 500,       // Lower threshold
108                max_size_value_ratio: 500.0, // Lower ratio
109                max_small_outputs: 5,        // More strict
110                min_fee_rate: 2,             // Higher fee rate requirement
111                ..SpamFilterConfig::default()
112            },
113            Self::StrictInscriptions => SpamFilterConfig {
114                filter_ordinals: true,
115                filter_dust: true,
116                filter_brc20: true,
117                filter_large_witness: true,
118                filter_low_fee_rate: false,
119                filter_high_size_value_ratio: true,
120                filter_many_small_outputs: true,
121                ordinals_strict_mode: true, // Envelope/pattern only; LargeWitness separate
122                max_witness_size: 1500,     // Slightly higher; LargeWitness is separate category
123                ..SpamFilterConfig::default()
124            },
125        }
126    }
127}
128
129/// Spam classification for a transaction
130#[derive(Debug, Clone, PartialEq, Eq)]
131pub enum SpamType {
132    /// Ordinals/Inscriptions (data embedded in witness or script)
133    Ordinals,
134    /// Dust outputs (< threshold satoshis)
135    Dust,
136    /// BRC-20 token transactions
137    BRC20,
138    /// Large witness data (suggests data embedding in witness)
139    LargeWitness,
140    /// Low fee rate (suggests non-monetary use)
141    LowFeeRate,
142    /// High size-to-value ratio (large transaction, small value transfer)
143    HighSizeValueRatio,
144    /// Many small outputs (common in token/ordinal distribution)
145    ManySmallOutputs,
146    /// Not spam (valid transaction)
147    NotSpam,
148}
149
150/// Adaptive witness size thresholds based on script type
151///
152/// These thresholds will be refined with real-world data collection.
153/// For now, they use conservative estimates based on typical transaction patterns.
154#[derive(Debug, Clone)]
155pub struct WitnessSizeThresholds {
156    /// Normal single-sig witness size (95th percentile)
157    pub normal_single_sig: usize,
158    /// Normal multi-sig witness size (95th percentile for 2-of-3)
159    pub normal_multi_sig: usize,
160    /// Normal P2WSH witness size (95th percentile)
161    pub normal_p2wsh: usize,
162    /// Suspicious threshold (current default)
163    pub suspicious_threshold: usize,
164    /// Definitely spam threshold (99.9th percentile)
165    pub definitely_spam: usize,
166}
167
168impl Default for WitnessSizeThresholds {
169    fn default() -> Self {
170        // These will be populated from real-world data collection
171        // For now, use conservative estimates
172        Self {
173            normal_single_sig: 200,
174            normal_multi_sig: 500,
175            normal_p2wsh: 800,
176            suspicious_threshold: 1000,
177            definitely_spam: 2000,
178        }
179    }
180}
181
182/// Witness element analysis result
183#[derive(Debug, Clone)]
184pub struct WitnessElementAnalysis {
185    /// Total witness size (including varint overhead)
186    pub total_size: usize,
187    /// Number of witness elements
188    pub element_count: usize,
189    /// Number of large elements (> 200 bytes)
190    pub large_elements: usize,
191    /// Number of medium elements (100-200 bytes)
192    pub medium_elements: usize,
193    /// Number of small elements (< 100 bytes)
194    pub small_elements: usize,
195    /// Whether pattern suggests data splitting (many medium elements)
196    pub suspicious_pattern: bool,
197}
198
199/// Spam filter configuration
200#[derive(Debug, Clone)]
201pub struct SpamFilterConfig {
202    /// Filter Ordinals/Inscriptions
203    pub filter_ordinals: bool,
204    /// Filter dust outputs
205    pub filter_dust: bool,
206    /// Filter BRC-20 patterns
207    pub filter_brc20: bool,
208    /// Filter transactions with large witness data
209    pub filter_large_witness: bool,
210    /// Filter transactions with low fee rate
211    pub filter_low_fee_rate: bool,
212    /// Filter transactions with high size-to-value ratio
213    pub filter_high_size_value_ratio: bool,
214    /// Filter transactions with many small outputs
215    pub filter_many_small_outputs: bool,
216    /// Minimum output value to consider non-dust (satoshis)
217    pub dust_threshold: i64,
218    /// Minimum output value to include in filtered blocks (satoshis)
219    pub min_output_value: i64,
220    /// Minimum fee rate threshold (satoshis per vbyte)
221    pub min_fee_rate: u64,
222    /// Maximum witness size before flagging (bytes)
223    /// Note: This is now adaptive based on script type when `use_adaptive_thresholds` is enabled
224    pub max_witness_size: usize,
225    /// Maximum size-to-value ratio (bytes per satoshi)
226    pub max_size_value_ratio: f64,
227    /// Maximum number of small outputs before flagging
228    pub max_small_outputs: usize,
229
230    // NEW: Adaptive thresholds
231    /// Use adaptive witness size thresholds based on script type
232    /// Default: true (enables data-driven thresholds)
233    pub use_adaptive_thresholds: bool,
234    /// Adaptive threshold configuration
235    pub adaptive_thresholds: WitnessSizeThresholds,
236
237    // NEW: Taproot-specific options
238    /// Filter Taproot-specific spam patterns (control blocks, annexes)
239    /// Default: true
240    pub filter_taproot_spam: bool,
241    /// Maximum Taproot control block size (bytes)
242    /// Control blocks: 33 bytes base + 32 bytes per tree level
243    /// BIP-110 limits to 257 bytes (depth 7), we use 289 bytes (depth 8) for policy
244    /// Default: 289 bytes (allows depth 8, more lenient than BIP-110)
245    pub max_taproot_control_size: usize,
246    /// Reject Taproot annexes (last witness element starting with OP_RESERVED)
247    /// Default: true
248    pub reject_taproot_annexes: bool,
249
250    // NEW: Total witness size check
251    /// Filter transactions with large total witness size across all inputs
252    /// Default: false (disabled by default, can be aggressive)
253    pub filter_large_total_witness: bool,
254    /// Maximum total witness size across all inputs (bytes)
255    /// Default: 5000 bytes
256    pub max_total_witness_size: usize,
257
258    // NEW: Enhanced detection options
259    /// Use improved envelope protocol detection (checks for OP_ENDIF)
260    /// Default: true
261    pub use_improved_envelope_detection: bool,
262    /// Use JSON validation for BRC-20 detection (requires serde_json)
263    /// Default: true (if serde_json available)
264    pub use_json_validation_brc20: bool,
265
266    // NEW: Fee rate calculation options
267    /// Require UTXO set for fee rate calculation (reject if unavailable)
268    /// If false, falls back to heuristic when UTXO set unavailable
269    /// Default: false (use heuristic fallback)
270    pub require_utxo_for_fee_rate: bool,
271    /// Minimum fee rate for large transactions (satoshis per vbyte)
272    /// Transactions larger than large_tx_threshold_bytes require this fee rate
273    /// Default: 2 sat/vB (higher than standard 1 sat/vB)
274    pub min_fee_rate_large_tx: u64,
275    /// Large transaction threshold (bytes)
276    /// Transactions larger than this require min_fee_rate_large_tx
277    /// Default: 1000 bytes
278    pub large_tx_threshold_bytes: usize,
279
280    /// Ordinals detection mode: strict (envelope/pattern only) vs legacy (includes large witness heuristics)
281    /// When true: Ordinals = envelope protocol, ordinal output patterns, Taproot annex/control-block.
282    ///            Large witness alone is NOT Ordinals (handled by LargeWitness category).
283    /// When false: Legacy behavior - large witness and witness data patterns also trigger Ordinals.
284    /// Default: true (minimize false positives; Miniscript/vaults no longer misclassified as Ordinals)
285    pub ordinals_strict_mode: bool,
286}
287
288impl Default for SpamFilterConfig {
289    fn default() -> Self {
290        Self {
291            filter_ordinals: true,
292            filter_dust: true,
293            filter_brc20: true,
294            filter_large_witness: true,
295            filter_low_fee_rate: false, // Disabled by default (too aggressive)
296            filter_high_size_value_ratio: true,
297            filter_many_small_outputs: true,
298            dust_threshold: DEFAULT_DUST_THRESHOLD,
299            min_output_value: DEFAULT_DUST_THRESHOLD,
300            min_fee_rate: DEFAULT_MIN_FEE_RATE,
301            max_witness_size: DEFAULT_MAX_WITNESS_SIZE,
302            max_size_value_ratio: DEFAULT_MAX_SIZE_VALUE_RATIO,
303            max_small_outputs: 10, // Flag if more than 10 small outputs
304
305            // NEW: Adaptive thresholds
306            use_adaptive_thresholds: true, // Enable by default
307            adaptive_thresholds: WitnessSizeThresholds::default(),
308
309            // NEW defaults
310            filter_taproot_spam: true,
311            max_taproot_control_size: 289, // 33 + 32*8 (depth 8)
312            reject_taproot_annexes: true,
313            filter_large_total_witness: false, // Disabled by default (can be aggressive)
314            max_total_witness_size: 5000,
315            use_improved_envelope_detection: true,
316            use_json_validation_brc20: true,
317            require_utxo_for_fee_rate: false, // Use heuristic fallback
318            min_fee_rate_large_tx: 2,         // 2 sat/vB
319            large_tx_threshold_bytes: 1000,   // 1 KB
320            ordinals_strict_mode: true,       // Envelope/pattern only; LargeWitness is separate
321        }
322    }
323}
324
325/// Spam filter result
326#[derive(Debug, Clone)]
327pub struct SpamFilterResult {
328    /// Whether transaction is spam
329    pub is_spam: bool,
330    /// Primary spam type detected
331    pub spam_type: SpamType,
332    /// All detected spam types (transaction may match multiple)
333    pub detected_types: Vec<SpamType>,
334}
335
336/// Spam filter implementation
337#[derive(Clone)]
338pub struct SpamFilter {
339    config: SpamFilterConfig,
340    /// Reserved for script-type LRU; wired incrementally in hot paths.
341    #[cfg(feature = "production")]
342    #[allow(dead_code)]
343    pub(crate) script_type_cache: std::sync::Arc<std::sync::RwLock<lru::LruCache<u64, bool>>>,
344}
345
346impl SpamFilter {
347    /// Create a new spam filter with default configuration
348    pub fn new() -> Self {
349        Self {
350            config: SpamFilterConfig::default(),
351            #[cfg(feature = "production")]
352            script_type_cache: std::sync::Arc::new(std::sync::RwLock::new(lru::LruCache::new(
353                std::num::NonZeroUsize::new(10_000).unwrap(),
354            ))),
355        }
356    }
357
358    /// Create a new spam filter with custom configuration
359    pub fn with_config(config: SpamFilterConfig) -> Self {
360        Self {
361            config,
362            #[cfg(feature = "production")]
363            script_type_cache: std::sync::Arc::new(std::sync::RwLock::new(lru::LruCache::new(
364                std::num::NonZeroUsize::new(10_000).unwrap(),
365            ))),
366        }
367    }
368
369    /// Create a new spam filter with a preset configuration
370    ///
371    /// Presets provide easy-to-use configurations for common use cases:
372    /// - `Disabled`: No spam filtering
373    /// - `Conservative`: Lenient filtering, minimal false positives
374    /// - `Moderate`: Balanced filtering (default)
375    /// - `Aggressive`: Strong filtering, may have false positives
376    pub fn with_preset(preset: SpamFilterPreset) -> Self {
377        Self::with_config(preset.to_config())
378    }
379
380    /// Check if a transaction is spam (without witness data)
381    ///
382    /// This is the backward-compatible method. For better detection, use `is_spam_with_witness`.
383    pub fn is_spam(&self, tx: &Transaction) -> SpamFilterResult {
384        self.is_spam_with_witness(tx, None, None)
385    }
386
387    /// Check if a transaction is spam (with optional witness data and UTXO set)
388    ///
389    /// Witness data is required for detecting Taproot/SegWit-based Ordinals.
390    /// UTXO set is optional but improves fee rate calculation accuracy.
391    /// If witness data is not provided, detection will be less accurate.
392    pub fn is_spam_with_witness(
393        &self,
394        tx: &Transaction,
395        witnesses: Option<&[Witness]>,
396        utxo_set: Option<&UtxoSet>,
397    ) -> SpamFilterResult {
398        let mut detected_types = Vec::new();
399
400        // Check for Ordinals/Inscriptions (now with witness data support)
401        if self.config.filter_ordinals && self.detect_ordinals(tx, witnesses) {
402            detected_types.push(SpamType::Ordinals);
403        }
404
405        // Check for dust outputs
406        if self.config.filter_dust && self.detect_dust(tx) {
407            detected_types.push(SpamType::Dust);
408        }
409
410        // Check for BRC-20 patterns
411        if self.config.filter_brc20 && self.detect_brc20(tx) {
412            detected_types.push(SpamType::BRC20);
413        }
414
415        // Check for large witness data (now with adaptive thresholds)
416        if self.config.filter_large_witness && self.detect_large_witness(tx, witnesses) {
417            detected_types.push(SpamType::LargeWitness);
418        }
419
420        // Check for large total witness size (across all inputs)
421        if self.config.filter_large_total_witness && self.detect_large_total_witness(witnesses) {
422            detected_types.push(SpamType::LargeWitness);
423        }
424
425        // Check for low fee rate (requires fee calculation)
426        if self.config.filter_low_fee_rate && self.detect_low_fee_rate(tx, witnesses, utxo_set) {
427            detected_types.push(SpamType::LowFeeRate);
428        }
429
430        // Check for high size-to-value ratio
431        if self.config.filter_high_size_value_ratio
432            && self.detect_high_size_value_ratio(tx, witnesses)
433        {
434            detected_types.push(SpamType::HighSizeValueRatio);
435        }
436
437        // Check for many small outputs
438        if self.config.filter_many_small_outputs && self.detect_many_small_outputs(tx) {
439            detected_types.push(SpamType::ManySmallOutputs);
440        }
441
442        let is_spam = !detected_types.is_empty();
443        let spam_type = detected_types.first().cloned().unwrap_or(SpamType::NotSpam);
444
445        SpamFilterResult {
446            is_spam,
447            spam_type,
448            detected_types,
449        }
450    }
451
452    /// Filter a transaction based on spam detection
453    ///
454    /// Returns `Some(tx)` if transaction should be included (not spam),
455    /// or `None` if transaction should be filtered (spam).
456    pub fn filter_transaction(&self, tx: &Transaction) -> Option<Transaction> {
457        let result = self.is_spam(tx);
458        if result.is_spam {
459            None // Filter out spam
460        } else {
461            Some(tx.clone()) // Include non-spam
462        }
463    }
464    /// Detect Ordinals/Inscriptions in transaction
465    ///
466    /// Ordinals typically embed data in:
467    /// - Witness scripts (SegWit v0 or Taproot) - PRIMARY METHOD
468    /// - Script pubkey (OP_RETURN or data push)
469    /// - Envelope protocol patterns
470    fn detect_ordinals(&self, tx: &Transaction, witnesses: Option<&[Witness]>) -> bool {
471        // Check outputs for OP_RETURN or data pushes (common Ordinals pattern)
472        for output in &tx.outputs {
473            if self.has_ordinal_pattern(&output.script_pubkey) {
474                return true;
475            }
476        }
477
478        // Check inputs for envelope protocol in scriptSig
479        for input in &tx.inputs {
480            if self.has_envelope_pattern(&input.script_sig) {
481                return true;
482            }
483        }
484
485        // Check witness data (PRIMARY METHOD for Taproot/SegWit Ordinals)
486        if let Some(witnesses) = witnesses {
487            for (i, witness) in witnesses.iter().enumerate() {
488                if i >= tx.inputs.len() {
489                    break;
490                }
491
492                // Check for Taproot-specific spam patterns (annex, oversized control block)
493                if self.config.filter_taproot_spam {
494                    for output in &tx.outputs {
495                        if self.is_taproot_output(&output.script_pubkey)
496                            && self.detect_taproot_spam(output, witness)
497                        {
498                            return true;
499                        }
500                    }
501                }
502
503                // Envelope protocol in witness (inscription format: OP_0 OP_IF ... OP_ENDIF)
504                if self.has_envelope_in_witness(witness) {
505                    return true;
506                }
507
508                // Legacy mode: large witness and data patterns also trigger Ordinals
509                // Strict mode: LargeWitness is handled separately by detect_large_witness
510                if !self.config.ordinals_strict_mode {
511                    if self.config.use_adaptive_thresholds {
512                        if self.has_large_witness_stack_adaptive(witness, tx, i) {
513                            return true;
514                        }
515                    } else if self.has_large_witness_stack(witness) {
516                        return true;
517                    }
518                    if self.has_witness_data_pattern(witness) {
519                        return true;
520                    }
521                }
522            }
523        }
524
525        false
526    }
527
528    /// Check if witness contains envelope protocol (OP_0 OP_IF ... OP_ENDIF)
529    /// Inscriptions embed data using this pattern in Taproot script-path witness.
530    fn has_envelope_in_witness(&self, witness: &Witness) -> bool {
531        for element in witness {
532            if element.len() >= 4 && element[0] == OP_0 && element[1] == OP_IF {
533                if self.config.use_improved_envelope_detection {
534                    if element.iter().skip(2).any(|&b| b == OP_ENDIF) {
535                        return true;
536                    }
537                } else {
538                    return true;
539                }
540            }
541        }
542        false
543    }
544
545    /// Check if output is Taproot (P2TR)
546    ///
547    /// P2TR format: OP_1 + PUSH_32_BYTES + 32-byte x-only pubkey = 34 bytes
548    fn is_taproot_output(&self, script_pubkey: &ByteString) -> bool {
549        // P2TR: OP_1 + PUSH_32_BYTES + 32-byte x-only pubkey = 34 bytes
550        script_pubkey.len() == 34 && script_pubkey[0] == OP_1 && script_pubkey[1] == PUSH_32_BYTES
551    }
552
553    /// Detect Taproot-specific spam patterns
554    ///
555    /// Checks for:
556    /// - Taproot annexes (last witness element starting with OP_RESERVED)
557    /// - Large control blocks (script path spends with deep trees)
558    fn detect_taproot_spam(
559        &self,
560        output: &blvm_consensus::types::TransactionOutput,
561        witness: &Witness,
562    ) -> bool {
563        if !self.is_taproot_output(&output.script_pubkey) {
564            return false;
565        }
566
567        // Check for annex (last witness element starting with OP_RESERVED)
568        // BIP-341: Annex is the last witness element if it starts with 0x50
569        if self.config.reject_taproot_annexes {
570            if let Some(last) = witness.last() {
571                if !last.is_empty() && last[0] == blvm_consensus::opcodes::OP_RESERVED {
572                    // Annex detected - BIP-110 invalidates these
573                    return true;
574                }
575            }
576        }
577
578        // Check for large control blocks (script path spends)
579        // Control blocks are typically the last element in Taproot script path spends
580        // Format: 33 + 32*n bytes (where n is tree depth)
581        // Large control blocks suggest deep trees (potential data embedding)
582        if witness.len() >= 2 {
583            // Script path spend: script + control block + witness items
584            // Control block is typically the last element
585            if let Some(control_block) = witness.last() {
586                // Control block: 33 bytes base + 32 bytes per tree level
587                // TAPROOT_CONTROL_BASE_SIZE = 33, TAPROOT_CONTROL_NODE_SIZE = 32
588                // BIP-110 limits to TAPROOT_CONTROL_MAX_SIZE_REDUCED (257 bytes, depth 7)
589                // For policy, we use a configurable threshold (289 bytes, depth 8)
590                if control_block.len() > self.config.max_taproot_control_size {
591                    return true;
592                }
593            }
594        }
595
596        false
597    }
598
599    /// Check if witness stack is suspiciously large (suggests data embedding)
600    ///
601    /// Uses adaptive thresholds based on script type if enabled.
602    fn has_large_witness_stack(&self, witness: &Witness) -> bool {
603        let total_size = self.calculate_witness_size(witness);
604        total_size > self.config.max_witness_size
605    }
606
607    /// Check if witness stack is suspiciously large using adaptive thresholds
608    ///
609    /// This method uses script type detection to apply appropriate thresholds.
610    /// Falls back to fixed threshold if adaptive thresholds are disabled or script type cannot be determined.
611    fn has_large_witness_stack_adaptive(
612        &self,
613        witness: &Witness,
614        tx: &Transaction,
615        input_index: usize,
616    ) -> bool {
617        let total_size = self.calculate_witness_size(witness);
618
619        // If adaptive thresholds disabled, use fixed threshold
620        if !self.config.use_adaptive_thresholds {
621            return total_size > self.config.max_witness_size;
622        }
623
624        // Prefer script type from this input's scriptSig when we can infer it (SegWit often
625        // uses an empty scriptSig; see `detect_input_script_type`). Otherwise fall back to
626        // scanning outputs — a simplified heuristic until prevout mapping is available.
627        let mut detected_script_type: Option<ScriptType> = None;
628        if input_index < tx.inputs.len() {
629            detected_script_type = detect_input_script_type(&tx.inputs[input_index].script_sig);
630        }
631        if detected_script_type.is_none() {
632            for output in &tx.outputs {
633                let script_type = ScriptType::detect(&output.script_pubkey);
634                if script_type != ScriptType::Unknown {
635                    detected_script_type = Some(script_type);
636                    break;
637                }
638            }
639        }
640
641        // Get threshold based on script type
642        let threshold = if let Some(script_type) = detected_script_type {
643            script_type.recommended_threshold()
644        } else {
645            // Fallback to fixed threshold if script type unknown
646            self.config.max_witness_size
647        };
648
649        total_size > threshold
650    }
651
652    /// Analyze witness elements for suspicious patterns
653    ///
654    /// Detects data splitting patterns (many medium-sized elements).
655    fn analyze_witness_elements(&self, witness: &Witness) -> WitnessElementAnalysis {
656        let total_size = self.calculate_witness_size(witness);
657        let element_count = witness.len();
658
659        let mut large_elements = 0;
660        let mut medium_elements = 0;
661        let mut small_elements = 0;
662
663        for element in witness {
664            if element.len() > 200 {
665                large_elements += 1;
666            } else if element.len() >= 100 {
667                medium_elements += 1;
668            } else {
669                small_elements += 1;
670            }
671        }
672
673        // Suspicious pattern: many medium elements (suggests data splitting)
674        let suspicious_pattern = medium_elements >= 10;
675
676        WitnessElementAnalysis {
677            total_size,
678            element_count,
679            large_elements,
680            medium_elements,
681            small_elements,
682            suspicious_pattern,
683        }
684    }
685
686    /// Calculate accurate witness size including varint overhead
687    ///
688    /// Witness size includes:
689    /// - Stack count varint (1 byte typically for small stacks)
690    /// - For each element: length varint (1-9 bytes) + element data
691    ///
692    /// This matches the actual serialized size of witness data in Bitcoin transactions.
693    fn calculate_witness_size(&self, witness: &Witness) -> usize {
694        // Stack count varint (typically 1 byte for small stacks)
695        let mut size = 1;
696
697        // Each element: length varint + element data
698        for element in witness {
699            // Varint encoding: 1 byte for <128, 2 for <16384, etc.
700            // Bitcoin varint encoding: values < 0xfd use 1 byte, larger values use prefix + data
701            // For witness element lengths, we use compact size encoding:
702            // - < 0xfd: 1 byte
703            // - 0xfd-0xffff: 0xfd prefix (1 byte) + 2 bytes data
704            // - 0x10000-0xffffffff: 0xfe prefix (1 byte) + 4 bytes data
705            // - > 0xffffffff: 0xff prefix (1 byte) + 8 bytes data
706            size += if element.len() <= VARINT_1BYTE_MAX as usize {
707                1
708            } else if element.len() <= 0xffff {
709                3 // VARINT_2BYTE_PREFIX + 2 bytes
710            } else if element.len() <= 0xffffffff {
711                5 // VARINT_4BYTE_PREFIX + 4 bytes
712            } else {
713                9 // VARINT_8BYTE_PREFIX + 8 bytes
714            };
715            size += element.len();
716        }
717
718        size
719    }
720
721    /// Check if witness contains data patterns (non-signature data)
722    fn has_witness_data_pattern(&self, witness: &Witness) -> bool {
723        if witness.is_empty() {
724            return false;
725        }
726
727        // Check for very large witness elements (>520 bytes is max for signatures)
728        // Elements larger than typical signature size suggest data embedding
729        for element in witness {
730            // Typical signatures are 71-73 bytes (DER-encoded) or 64 bytes (Schnorr)
731            // Witness elements >200 bytes are suspicious for data embedding
732            if element.len() > 200 {
733                // Check if it looks like data (not a signature)
734                // Signatures typically start with 0x30 (DER) or are exactly 64 bytes (Schnorr)
735                if element.len() != 64 && (element.is_empty() || element[0] != DER_SIGNATURE_PREFIX)
736                {
737                    // Likely data embedding
738                    return true;
739                }
740            }
741        }
742
743        // Check for multiple large elements (suggests data chunks)
744        let large_elements = witness.iter().filter(|elem| elem.len() > 100).count();
745        if large_elements >= 3 {
746            return true;
747        }
748
749        // Check for suspicious pattern (many medium elements - data splitting)
750        let analysis = self.analyze_witness_elements(witness);
751        if analysis.suspicious_pattern {
752            return true;
753        }
754
755        false
756    }
757
758    /// Check if script has Ordinals pattern
759    ///
760    /// Ordinals typically use:
761    /// - OP_RETURN followed by data (>80 bytes)
762    /// - Envelope protocol (OP_0 OP_IF ... OP_ENDIF) in output/scriptSig
763    fn has_ordinal_pattern(&self, script: &ByteString) -> bool {
764        if script.is_empty() {
765            return false;
766        }
767
768        // OP_RETURN >80 bytes (BIP-110 limit is 83; larger suggests data embedding)
769        if script[0] == OP_RETURN && script.len() > 80 {
770            return true;
771        }
772
773        // Envelope protocol in output or scriptSig
774        if self.has_envelope_pattern(script) {
775            return true;
776        }
777
778        false
779    }
780
781    /// Check if script has envelope protocol pattern
782    fn has_envelope_pattern(&self, script: &ByteString) -> bool {
783        // Envelope protocol: OP_FALSE OP_IF ... OP_ENDIF
784        if script.len() < 4 {
785            return false;
786        }
787
788        // Check for OP_FALSE OP_IF pattern (common in inscriptions)
789        if script[0] == OP_0 && script[1] == OP_IF {
790            if self.config.use_improved_envelope_detection {
791                // Improved: Verify OP_ENDIF exists later in script
792                // Envelope protocol: OP_FALSE OP_IF ... OP_ENDIF
793                if script.iter().skip(2).any(|&b| b == OP_ENDIF) {
794                    return true;
795                }
796            } else {
797                // Original simple check (backward compatibility)
798                return true;
799            }
800        }
801
802        false
803    }
804
805    /// Detect dust outputs
806    ///
807    /// Dust outputs are outputs with value below threshold (default: 546 satoshis).
808    fn detect_dust(&self, tx: &Transaction) -> bool {
809        // Check if all outputs are below threshold
810        let mut all_dust = true;
811
812        for output in &tx.outputs {
813            if output.value >= self.config.dust_threshold {
814                all_dust = false;
815                break;
816            }
817        }
818
819        all_dust && !tx.outputs.is_empty()
820    }
821
822    /// Detect transactions with large witness data
823    ///
824    /// Large witness stacks often indicate data embedding (Ordinals, inscriptions).
825    /// Now uses adaptive thresholds based on script type.
826    fn detect_large_witness(&self, tx: &Transaction, witnesses: Option<&[Witness]>) -> bool {
827        if let Some(witnesses) = witnesses {
828            for (i, witness) in witnesses.iter().enumerate() {
829                // Use adaptive thresholds if enabled
830                if self.config.use_adaptive_thresholds {
831                    if self.has_large_witness_stack_adaptive(witness, tx, i) {
832                        return true;
833                    }
834                } else if self.has_large_witness_stack(witness) {
835                    return true;
836                }
837            }
838        }
839        false
840    }
841
842    /// Detect transactions with low fee rate
843    ///
844    /// Non-monetary transactions often pay minimal fees relative to size.
845    /// Now accepts optional UTXO set for accurate fee calculation.
846    fn detect_low_fee_rate(
847        &self,
848        tx: &Transaction,
849        witnesses: Option<&[Witness]>,
850        utxo_set: Option<&UtxoSet>,
851    ) -> bool {
852        let tx_size = self.estimate_transaction_size_with_witness(tx, witnesses);
853
854        // If require_utxo_for_fee_rate is true and UTXO set unavailable, reject
855        if self.config.require_utxo_for_fee_rate && utxo_set.is_none() {
856            // Cannot calculate accurate fee rate, reject if strict mode enabled
857            return true; // Reject as spam (conservative)
858        }
859
860        // Calculate fee rate
861        let fee_rate = if let Some(utxo_set) = utxo_set {
862            // Accurate calculation with UTXO set
863            self.calculate_fee_rate_accurate(tx, utxo_set, tx_size)
864        } else {
865            // Fallback to heuristic when UTXO set unavailable
866            self.calculate_fee_rate_heuristic(tx, tx_size)
867        };
868
869        // Check against threshold (use large tx threshold if applicable)
870        let threshold = if tx_size > self.config.large_tx_threshold_bytes {
871            self.config.min_fee_rate_large_tx
872        } else {
873            self.config.min_fee_rate
874        };
875
876        fee_rate < threshold
877    }
878
879    /// Calculate fee rate accurately using UTXO set
880    fn calculate_fee_rate_accurate(
881        &self,
882        tx: &Transaction,
883        utxo_set: &UtxoSet,
884        tx_size: usize,
885    ) -> u64 {
886        if tx_size == 0 {
887            return 0;
888        }
889
890        // Calculate actual fee
891        let mut input_total = 0u64;
892        for input in &tx.inputs {
893            if let Some(utxo) = utxo_set.get(&input.prevout) {
894                input_total += utxo.value as u64;
895            }
896        }
897
898        let output_total: u64 = tx.outputs.iter().map(|out| out.value as u64).sum();
899        let fee = input_total.saturating_sub(output_total);
900
901        // Fee rate in satoshis per vbyte
902        if tx_size > 0 {
903            fee / tx_size as u64
904        } else {
905            0
906        }
907    }
908
909    /// Calculate fee rate using heuristics (fallback)
910    fn calculate_fee_rate_heuristic(&self, tx: &Transaction, tx_size: usize) -> u64 {
911        if tx_size == 0 {
912            return 0;
913        }
914
915        let total_output_value: i64 = tx.outputs.iter().map(|out| out.value).sum();
916
917        // Heuristic: large transactions with small output value likely have low fee rate
918        if tx_size > 1000 && total_output_value < 10000 {
919            // Assume minimal fee (1000 sats) for large transactions
920            1000u64.saturating_div(tx_size as u64)
921        } else {
922            // For other transactions, assume reasonable fee rate
923            // This is conservative - may have false negatives
924            self.config.min_fee_rate
925        }
926    }
927
928    /// Detect transactions with large total witness size across all inputs
929    fn detect_large_total_witness(&self, witnesses: Option<&[Witness]>) -> bool {
930        if !self.config.filter_large_total_witness {
931            return false; // Feature disabled
932        }
933
934        if let Some(witnesses) = witnesses {
935            let total_size: usize = witnesses
936                .iter()
937                .map(|w| self.calculate_witness_size(w))
938                .sum();
939
940            total_size > self.config.max_total_witness_size
941        } else {
942            false
943        }
944    }
945
946    /// Detect transactions with high size-to-value ratio
947    ///
948    /// Non-monetary transactions often have very large size relative to value transferred.
949    /// Now uses transaction type detection to adjust thresholds for legitimate transactions
950    /// (consolidations, CoinJoins) that legitimately have high ratios.
951    fn detect_high_size_value_ratio(
952        &self,
953        tx: &Transaction,
954        witnesses: Option<&[Witness]>,
955    ) -> bool {
956        let tx_size = self.estimate_transaction_size_with_witness(tx, witnesses) as f64;
957        let total_output_value: f64 = tx.outputs.iter().map(|out| out.value as f64).sum();
958
959        // Avoid division by zero
960        if total_output_value <= 0.0 {
961            // Transaction with zero outputs is suspicious
962            return tx_size > 1000.0;
963        }
964
965        let ratio = tx_size / total_output_value;
966
967        // Use transaction type to adjust threshold
968        let threshold = if self.config.use_adaptive_thresholds {
969            let tx_type = TransactionType::detect(tx);
970            tx_type.recommended_size_value_ratio()
971        } else {
972            self.config.max_size_value_ratio
973        };
974
975        ratio > threshold
976    }
977
978    /// Detect transactions with many small outputs
979    ///
980    /// Token distributions and Ordinal transfers often create many small outputs.
981    fn detect_many_small_outputs(&self, tx: &Transaction) -> bool {
982        let small_output_count = tx
983            .outputs
984            .iter()
985            .filter(|out| out.value < self.config.dust_threshold)
986            .count();
987
988        small_output_count > self.config.max_small_outputs
989    }
990
991    /// Estimate transaction size including witness data
992    fn estimate_transaction_size_with_witness(
993        &self,
994        tx: &Transaction,
995        witnesses: Option<&[Witness]>,
996    ) -> usize {
997        // Base transaction size (non-witness)
998        let base_size = estimate_transaction_size(tx) as usize;
999
1000        // Add witness size if available
1001        if let Some(witnesses) = witnesses {
1002            let witness_size: usize = witnesses
1003                .iter()
1004                .map(|witness| {
1005                    // Witness stack count (varint, ~1 byte)
1006                    let mut size = 1;
1007                    // Each witness element: length (varint, ~1 byte) + element data
1008                    for element in witness {
1009                        size += 1; // varint for length
1010                        size += element.len();
1011                    }
1012                    size
1013                })
1014                .sum();
1015
1016            // SegWit marker and flag (2 bytes)
1017            let has_witness = witness_size > 0;
1018            if has_witness {
1019                base_size + 2 + witness_size
1020            } else {
1021                base_size
1022            }
1023        } else {
1024            base_size
1025        }
1026    }
1027
1028    /// Detect BRC-20 token transactions
1029    ///
1030    /// BRC-20 transactions typically have:
1031    /// - OP_RETURN outputs with JSON data
1032    /// - Specific JSON patterns (mint, transfer, deploy)
1033    fn detect_brc20(&self, tx: &Transaction) -> bool {
1034        // Check outputs for OP_RETURN with JSON-like data
1035        for output in &tx.outputs {
1036            if self.has_brc20_pattern(&output.script_pubkey) {
1037                return true;
1038            }
1039        }
1040
1041        false
1042    }
1043
1044    /// Check if script has BRC-20 pattern
1045    ///
1046    /// BRC-20 transactions use OP_RETURN with JSON:
1047    /// - {"p":"brc-20","op":"mint",...}
1048    /// - {"p":"brc-20","op":"transfer",...}
1049    /// - {"p":"brc-20","op":"deploy",...}
1050    fn has_brc20_pattern(&self, script: &ByteString) -> bool {
1051        if script.len() < 20 {
1052            return false;
1053        }
1054
1055        // Check for OP_RETURN
1056        if script[0] != OP_RETURN {
1057            return false;
1058        }
1059
1060        // Extract data after OP_RETURN
1061        let data = &script[1..];
1062
1063        // Try to decode as UTF-8
1064        let script_str = match String::from_utf8(data.to_vec()) {
1065            Ok(s) => s,
1066            Err(_) => {
1067                // Not valid UTF-8, use simple pattern matching
1068                return self.has_brc20_pattern_simple(data);
1069            }
1070        };
1071
1072        // Use JSON validation if enabled
1073        if self.config.use_json_validation_brc20 {
1074            self.has_brc20_pattern_json(&script_str)
1075        } else {
1076            // Fallback to simple string matching
1077            self.has_brc20_pattern_simple(data)
1078        }
1079    }
1080
1081    /// Check for BRC-20 pattern using JSON validation
1082    fn has_brc20_pattern_json(&self, json_str: &str) -> bool {
1083        // Remove whitespace for more robust matching
1084        let cleaned: String = json_str.chars().filter(|c| !c.is_whitespace()).collect();
1085
1086        // Try to parse as JSON
1087        if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(&cleaned) {
1088            // Check if it's a valid BRC-20 transaction
1089            if let Some(obj) = json_value.as_object() {
1090                // Check for protocol field: "p": "brc-20"
1091                if let Some(protocol) = obj.get("p") {
1092                    if protocol.as_str() == Some("brc-20") {
1093                        // Check for operation field: "op": "mint" | "transfer" | "deploy"
1094                        if let Some(op) = obj.get("op") {
1095                            if let Some(op_str) = op.as_str() {
1096                                return matches!(op_str, "mint" | "transfer" | "deploy");
1097                            }
1098                        }
1099                    }
1100                }
1101            }
1102        }
1103
1104        // Fallback: try parsing original string (with whitespace)
1105        if let Ok(json_value) = serde_json::from_str::<serde_json::Value>(json_str) {
1106            if let Some(obj) = json_value.as_object() {
1107                if let Some(protocol) = obj.get("p") {
1108                    if protocol.as_str() == Some("brc-20") {
1109                        if let Some(op) = obj.get("op") {
1110                            if let Some(op_str) = op.as_str() {
1111                                return matches!(op_str, "mint" | "transfer" | "deploy");
1112                            }
1113                        }
1114                    }
1115                }
1116            }
1117        }
1118
1119        false
1120    }
1121
1122    /// Check for BRC-20 pattern using simple string matching (fallback)
1123    fn has_brc20_pattern_simple(&self, data: &[u8]) -> bool {
1124        // Convert to string for pattern matching
1125        if let Ok(script_str) = String::from_utf8(data.to_vec()) {
1126            // Check for BRC-20 markers (case-insensitive)
1127            let lower = script_str.to_lowercase();
1128            lower.contains("brc-20")
1129                || lower.contains("\"p\":\"brc-20\"")
1130                || lower.contains("op\":\"mint")
1131                || lower.contains("op\":\"transfer")
1132                || lower.contains("op\":\"deploy")
1133        } else {
1134            // Not valid UTF-8, try byte pattern matching
1135            // Look for "brc-20" in bytes (case-insensitive)
1136            let pattern = b"brc-20";
1137            let pattern_lower = b"BRC-20";
1138            data.windows(pattern.len())
1139                .any(|window| window == pattern || window == pattern_lower)
1140        }
1141    }
1142
1143    /// Filter transactions from a block (without witness data)
1144    ///
1145    /// Returns filtered transactions (non-spam only) and summary of filtered spam.
1146    ///
1147    /// **Important**: This function filters entire transactions. For UTXO commitment processing,
1148    /// use `process_filtered_block` in `initial_sync.rs` which correctly handles spam
1149    /// transactions by removing spent inputs while filtering outputs.
1150    ///
1151    /// This function is primarily used for:
1152    /// - Bandwidth estimation (calculating filtered size)
1153    /// - Statistics and reporting
1154    /// - Network message filtering (where entire transactions can be dropped)
1155    ///
1156    /// **Do not use this for UTXO tree updates** - it will cause UTXO set inconsistency
1157    /// when spam transactions spend non-spam inputs.
1158    pub fn filter_block(&self, transactions: &[Transaction]) -> (Vec<Transaction>, SpamSummary) {
1159        self.filter_block_with_witness(transactions, None)
1160    }
1161
1162    /// Filter transactions from a block (with optional witness data)
1163    ///
1164    /// Returns filtered transactions (non-spam only) and summary of filtered spam.
1165    /// Witness data improves detection accuracy for SegWit/Taproot-based spam.
1166    ///
1167    /// **Important**: This function filters entire transactions. For UTXO commitment processing,
1168    /// use `process_filtered_block` in `initial_sync.rs` which correctly handles spam
1169    /// transactions by removing spent inputs while filtering outputs.
1170    ///
1171    /// This function is primarily used for:
1172    /// - Bandwidth estimation (calculating filtered size)
1173    /// - Statistics and reporting
1174    /// - Network message filtering (where entire transactions can be dropped)
1175    ///
1176    /// **Do not use this for UTXO tree updates** - it will cause UTXO set inconsistency
1177    /// when spam transactions spend non-spam inputs.
1178    pub fn filter_block_with_witness(
1179        &self,
1180        transactions: &[Transaction],
1181        witnesses: Option<&[Vec<Witness>]>,
1182    ) -> (Vec<Transaction>, SpamSummary) {
1183        let mut filtered_txs = Vec::new();
1184        let mut filtered_count = 0u32;
1185        let mut filtered_size = 0u64;
1186        let mut spam_breakdown = SpamBreakdown::default();
1187
1188        for (i, tx) in transactions.iter().enumerate() {
1189            // Get witness data for this transaction if available
1190            let tx_witnesses = witnesses.and_then(|w| w.get(i));
1191
1192            let result = if let Some(tx_witnesses) = tx_witnesses {
1193                self.is_spam_with_witness(tx, Some(tx_witnesses), None)
1194            } else {
1195                self.is_spam(tx)
1196            };
1197
1198            if result.is_spam {
1199                filtered_count += 1;
1200                let tx_size = if let Some(tx_witnesses) = tx_witnesses {
1201                    self.estimate_transaction_size_with_witness(tx, Some(tx_witnesses)) as u64
1202                } else {
1203                    estimate_transaction_size(tx)
1204                };
1205                filtered_size += tx_size;
1206
1207                // Update breakdown
1208                for spam_type in &result.detected_types {
1209                    match spam_type {
1210                        SpamType::Ordinals => spam_breakdown.ordinals += 1,
1211                        SpamType::Dust => spam_breakdown.dust += 1,
1212                        SpamType::BRC20 => spam_breakdown.brc20 += 1,
1213                        SpamType::LargeWitness => spam_breakdown.ordinals += 1, // Count as Ordinals
1214                        SpamType::LowFeeRate => spam_breakdown.dust += 1, // Count as suspicious
1215                        SpamType::HighSizeValueRatio => spam_breakdown.ordinals += 1, // Count as Ordinals
1216                        SpamType::ManySmallOutputs => spam_breakdown.dust += 1, // Count as dust-like
1217                        SpamType::NotSpam => {}
1218                    }
1219                }
1220            } else {
1221                filtered_txs.push(tx.clone());
1222            }
1223        }
1224
1225        let summary = SpamSummary {
1226            filtered_count,
1227            filtered_size,
1228            by_type: spam_breakdown,
1229        };
1230
1231        (filtered_txs, summary)
1232    }
1233}
1234
1235impl Default for SpamFilter {
1236    fn default() -> Self {
1237        Self::new()
1238    }
1239}
1240
1241/// Summary of filtered spam
1242#[derive(Debug, Clone, Default)]
1243pub struct SpamSummary {
1244    /// Number of transactions filtered
1245    pub filtered_count: u32,
1246    /// Total size of filtered transactions (bytes, estimated)
1247    pub filtered_size: u64,
1248    /// Breakdown by spam type
1249    pub by_type: SpamBreakdown,
1250}
1251
1252/// Breakdown of spam by category
1253#[derive(Debug, Clone, Default)]
1254pub struct SpamBreakdown {
1255    pub ordinals: u32,
1256    pub inscriptions: u32,
1257    pub dust: u32,
1258    pub brc20: u32,
1259}
1260
1261/// Estimate transaction size in bytes
1262fn estimate_transaction_size(tx: &Transaction) -> u64 {
1263    // Simplified estimation:
1264    // - Version: 4 bytes
1265    // - Input count: varint (1-9 bytes, estimate 1)
1266    // - Per input: ~150 bytes (prevout + script + sequence)
1267    // - Output count: varint (1-9 bytes, estimate 1)
1268    // - Per output: ~35 bytes (value + script)
1269    // - Locktime: 4 bytes
1270
1271    let base_size: u64 = 4 + 1 + 1 + 4; // Version + input count + output count + locktime
1272    let input_size = tx.inputs.len() as u64 * 150;
1273    let output_size = tx
1274        .outputs
1275        .iter()
1276        .map(|out| 8 + out.script_pubkey.len() as u64)
1277        .sum::<u64>();
1278
1279    let total_size = base_size
1280        .checked_add(input_size)
1281        .and_then(|sum| sum.checked_add(output_size))
1282        .unwrap_or(u64::MAX); // Overflow protection
1283
1284    // Runtime assertion: Estimated size must be reasonable
1285    debug_assert!(
1286        total_size <= 1_000_000,
1287        "Transaction size estimate ({total_size}) must not exceed MAX_TX_SIZE (1MB)"
1288    );
1289
1290    total_size
1291}
1292
1293/// Serializable adaptive thresholds
1294#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1295pub struct WitnessSizeThresholdsSerializable {
1296    #[serde(default = "default_normal_single_sig")]
1297    pub normal_single_sig: usize,
1298    #[serde(default = "default_normal_multi_sig")]
1299    pub normal_multi_sig: usize,
1300    #[serde(default = "default_normal_p2wsh")]
1301    pub normal_p2wsh: usize,
1302    #[serde(default = "default_suspicious_threshold")]
1303    pub suspicious_threshold: usize,
1304    #[serde(default = "default_definitely_spam")]
1305    pub definitely_spam: usize,
1306}
1307
1308impl From<WitnessSizeThresholdsSerializable> for WitnessSizeThresholds {
1309    fn from(serializable: WitnessSizeThresholdsSerializable) -> Self {
1310        WitnessSizeThresholds {
1311            normal_single_sig: serializable.normal_single_sig,
1312            normal_multi_sig: serializable.normal_multi_sig,
1313            normal_p2wsh: serializable.normal_p2wsh,
1314            suspicious_threshold: serializable.suspicious_threshold,
1315            definitely_spam: serializable.definitely_spam,
1316        }
1317    }
1318}
1319
1320impl From<WitnessSizeThresholds> for WitnessSizeThresholdsSerializable {
1321    fn from(thresholds: WitnessSizeThresholds) -> Self {
1322        WitnessSizeThresholdsSerializable {
1323            normal_single_sig: thresholds.normal_single_sig,
1324            normal_multi_sig: thresholds.normal_multi_sig,
1325            normal_p2wsh: thresholds.normal_p2wsh,
1326            suspicious_threshold: thresholds.suspicious_threshold,
1327            definitely_spam: thresholds.definitely_spam,
1328        }
1329    }
1330}
1331
1332/// Serializable spam filter configuration (for config files)
1333#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1334pub struct SpamFilterConfigSerializable {
1335    #[serde(default = "default_true")]
1336    pub filter_ordinals: bool,
1337    #[serde(default = "default_true")]
1338    pub filter_dust: bool,
1339    #[serde(default = "default_true")]
1340    pub filter_brc20: bool,
1341    #[serde(default = "default_true")]
1342    pub filter_large_witness: bool,
1343    #[serde(default = "default_false")]
1344    pub filter_low_fee_rate: bool,
1345    #[serde(default = "default_true")]
1346    pub filter_high_size_value_ratio: bool,
1347    #[serde(default = "default_true")]
1348    pub filter_many_small_outputs: bool,
1349    #[serde(default = "default_dust_threshold")]
1350    pub dust_threshold: i64,
1351    #[serde(default = "default_dust_threshold")]
1352    pub min_output_value: i64,
1353    #[serde(default = "default_min_fee_rate")]
1354    pub min_fee_rate: u64,
1355    #[serde(default = "default_max_witness_size")]
1356    pub max_witness_size: usize,
1357    #[serde(default = "default_max_size_value_ratio")]
1358    pub max_size_value_ratio: f64,
1359    #[serde(default = "default_max_small_outputs")]
1360    pub max_small_outputs: usize,
1361
1362    // NEW: Adaptive thresholds
1363    #[serde(default = "default_true")]
1364    pub use_adaptive_thresholds: bool,
1365    #[serde(default = "default_adaptive_thresholds")]
1366    pub adaptive_thresholds: WitnessSizeThresholdsSerializable,
1367
1368    // NEW: Taproot-specific options
1369    #[serde(default = "default_true")]
1370    pub filter_taproot_spam: bool,
1371    #[serde(default = "default_max_taproot_control_size")]
1372    pub max_taproot_control_size: usize,
1373    #[serde(default = "default_true")]
1374    pub reject_taproot_annexes: bool,
1375
1376    // NEW: Total witness size check
1377    #[serde(default = "default_false")]
1378    pub filter_large_total_witness: bool,
1379    #[serde(default = "default_max_total_witness_size")]
1380    pub max_total_witness_size: usize,
1381
1382    // NEW: Enhanced detection options
1383    #[serde(default = "default_true")]
1384    pub use_improved_envelope_detection: bool,
1385    #[serde(default = "default_true")]
1386    pub use_json_validation_brc20: bool,
1387
1388    // NEW: Fee rate calculation options
1389    #[serde(default = "default_false")]
1390    pub require_utxo_for_fee_rate: bool,
1391    #[serde(default = "default_min_fee_rate_large_tx")]
1392    pub min_fee_rate_large_tx: u64,
1393    #[serde(default = "default_large_tx_threshold_bytes")]
1394    pub large_tx_threshold_bytes: usize,
1395    #[serde(default = "default_true")]
1396    pub ordinals_strict_mode: bool,
1397}
1398
1399fn default_true() -> bool {
1400    true
1401}
1402
1403fn default_false() -> bool {
1404    false
1405}
1406
1407fn default_dust_threshold() -> i64 {
1408    546
1409}
1410
1411fn default_min_fee_rate() -> u64 {
1412    1
1413}
1414
1415fn default_max_witness_size() -> usize {
1416    1000
1417}
1418
1419fn default_max_size_value_ratio() -> f64 {
1420    1000.0
1421}
1422
1423fn default_max_small_outputs() -> usize {
1424    10
1425}
1426
1427fn default_max_taproot_control_size() -> usize {
1428    289
1429}
1430
1431fn default_max_total_witness_size() -> usize {
1432    5000
1433}
1434
1435fn default_min_fee_rate_large_tx() -> u64 {
1436    2
1437}
1438
1439fn default_large_tx_threshold_bytes() -> usize {
1440    1000
1441}
1442
1443fn default_normal_single_sig() -> usize {
1444    200
1445}
1446
1447fn default_normal_multi_sig() -> usize {
1448    500
1449}
1450
1451fn default_normal_p2wsh() -> usize {
1452    800
1453}
1454
1455fn default_suspicious_threshold() -> usize {
1456    1000
1457}
1458
1459fn default_definitely_spam() -> usize {
1460    2000
1461}
1462
1463fn default_adaptive_thresholds() -> WitnessSizeThresholdsSerializable {
1464    WitnessSizeThresholdsSerializable {
1465        normal_single_sig: 200,
1466        normal_multi_sig: 500,
1467        normal_p2wsh: 800,
1468        suspicious_threshold: 1000,
1469        definitely_spam: 2000,
1470    }
1471}
1472
1473impl Default for SpamFilterConfigSerializable {
1474    fn default() -> Self {
1475        Self {
1476            filter_ordinals: default_true(),
1477            filter_dust: default_true(),
1478            filter_brc20: default_true(),
1479            filter_large_witness: default_true(),
1480            filter_low_fee_rate: default_false(),
1481            filter_high_size_value_ratio: default_true(),
1482            filter_many_small_outputs: default_true(),
1483            dust_threshold: default_dust_threshold(),
1484            min_output_value: default_dust_threshold(),
1485            min_fee_rate: default_min_fee_rate(),
1486            max_witness_size: default_max_witness_size(),
1487            max_size_value_ratio: default_max_size_value_ratio(),
1488            max_small_outputs: default_max_small_outputs(),
1489            use_adaptive_thresholds: default_true(),
1490            adaptive_thresholds: default_adaptive_thresholds(),
1491            filter_taproot_spam: default_true(),
1492            max_taproot_control_size: default_max_taproot_control_size(),
1493            reject_taproot_annexes: default_true(),
1494            filter_large_total_witness: default_false(),
1495            max_total_witness_size: default_max_total_witness_size(),
1496            use_improved_envelope_detection: default_true(),
1497            use_json_validation_brc20: default_true(),
1498            require_utxo_for_fee_rate: default_false(),
1499            min_fee_rate_large_tx: default_min_fee_rate_large_tx(),
1500            large_tx_threshold_bytes: default_large_tx_threshold_bytes(),
1501            ordinals_strict_mode: default_true(),
1502        }
1503    }
1504}
1505
1506impl From<SpamFilterConfigSerializable> for SpamFilterConfig {
1507    fn from(serializable: SpamFilterConfigSerializable) -> Self {
1508        SpamFilterConfig {
1509            filter_ordinals: serializable.filter_ordinals,
1510            filter_dust: serializable.filter_dust,
1511            filter_brc20: serializable.filter_brc20,
1512            filter_large_witness: serializable.filter_large_witness,
1513            filter_low_fee_rate: serializable.filter_low_fee_rate,
1514            filter_high_size_value_ratio: serializable.filter_high_size_value_ratio,
1515            filter_many_small_outputs: serializable.filter_many_small_outputs,
1516            dust_threshold: serializable.dust_threshold,
1517            min_output_value: serializable.min_output_value,
1518            min_fee_rate: serializable.min_fee_rate,
1519            max_witness_size: serializable.max_witness_size,
1520            max_size_value_ratio: serializable.max_size_value_ratio,
1521            max_small_outputs: serializable.max_small_outputs,
1522            // NEW: Adaptive thresholds
1523            use_adaptive_thresholds: serializable.use_adaptive_thresholds,
1524            adaptive_thresholds: serializable.adaptive_thresholds.into(),
1525            // NEW fields
1526            filter_taproot_spam: serializable.filter_taproot_spam,
1527            max_taproot_control_size: serializable.max_taproot_control_size,
1528            reject_taproot_annexes: serializable.reject_taproot_annexes,
1529            filter_large_total_witness: serializable.filter_large_total_witness,
1530            max_total_witness_size: serializable.max_total_witness_size,
1531            use_improved_envelope_detection: serializable.use_improved_envelope_detection,
1532            use_json_validation_brc20: serializable.use_json_validation_brc20,
1533            require_utxo_for_fee_rate: serializable.require_utxo_for_fee_rate,
1534            min_fee_rate_large_tx: serializable.min_fee_rate_large_tx,
1535            large_tx_threshold_bytes: serializable.large_tx_threshold_bytes,
1536            ordinals_strict_mode: serializable.ordinals_strict_mode,
1537        }
1538    }
1539}
1540
1541impl From<SpamFilterConfig> for SpamFilterConfigSerializable {
1542    fn from(config: SpamFilterConfig) -> Self {
1543        SpamFilterConfigSerializable {
1544            filter_ordinals: config.filter_ordinals,
1545            filter_dust: config.filter_dust,
1546            filter_brc20: config.filter_brc20,
1547            filter_large_witness: config.filter_large_witness,
1548            filter_low_fee_rate: config.filter_low_fee_rate,
1549            filter_high_size_value_ratio: config.filter_high_size_value_ratio,
1550            filter_many_small_outputs: config.filter_many_small_outputs,
1551            dust_threshold: config.dust_threshold,
1552            min_output_value: config.min_output_value,
1553            min_fee_rate: config.min_fee_rate,
1554            max_witness_size: config.max_witness_size,
1555            max_size_value_ratio: config.max_size_value_ratio,
1556            max_small_outputs: config.max_small_outputs,
1557            // NEW: Adaptive thresholds
1558            use_adaptive_thresholds: config.use_adaptive_thresholds,
1559            adaptive_thresholds: config.adaptive_thresholds.into(),
1560            // NEW fields
1561            filter_taproot_spam: config.filter_taproot_spam,
1562            max_taproot_control_size: config.max_taproot_control_size,
1563            reject_taproot_annexes: config.reject_taproot_annexes,
1564            filter_large_total_witness: config.filter_large_total_witness,
1565            max_total_witness_size: config.max_total_witness_size,
1566            use_improved_envelope_detection: config.use_improved_envelope_detection,
1567            use_json_validation_brc20: config.use_json_validation_brc20,
1568            require_utxo_for_fee_rate: config.require_utxo_for_fee_rate,
1569            min_fee_rate_large_tx: config.min_fee_rate_large_tx,
1570            large_tx_threshold_bytes: config.large_tx_threshold_bytes,
1571            ordinals_strict_mode: config.ordinals_strict_mode,
1572        }
1573    }
1574}
blvm_protocol/spam_filter/mod.rs

blvm_protocol/spam_filter/
mod.rs