Skip to main content

blvm_protocol/
bip158.rs

1//! BIP158: Compact Block Filters for Light Client Discovery
2//!
3//! Specification: https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki
4//!
5//! Implements Golomb-Rice Coded Sets (GCS) for efficient block filtering.
6//! Allows light clients to determine if a block contains relevant transactions
7//! without downloading the entire block.
8
9use crate::Transaction;
10use sha2::{Digest, Sha256};
11use std::collections::HashSet;
12
13/// BIP158 filter parameter: false positive rate (2^(-P))
14/// P = 19 means ~1 in 524,288 false positives
15pub const BIP158_P: u8 = 19;
16
17/// BIP158 filter parameter: multiplier M = 2^P
18pub const BIP158_M: u64 = 1 << BIP158_P; // 2^19 = 524,288
19
20/// Compact block filter (GCS)
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct CompactBlockFilter {
23    /// Golomb-Rice encoded filter data
24    pub filter_data: Vec<u8>,
25    /// Number of elements in the filter
26    pub num_elements: u32,
27}
28
29/// Hash a script to a number in range [0, N*M)
30fn hash_to_range(script: &[u8], n: u64, m: u64) -> u64 {
31    // Hash script with SHA256
32    let mut hasher = Sha256::new();
33    hasher.update(script);
34    let hash = hasher.finalize();
35
36    // Interpret first 8 bytes as u64 (little-endian)
37    let hash_value = u64::from_le_bytes([
38        hash[0], hash[1], hash[2], hash[3], hash[4], hash[5], hash[6], hash[7],
39    ]);
40
41    // Map to range [0, N*M)
42    hash_value % (n * m)
43}
44
45/// Bit writer for Golomb-Rice encoding
46struct BitWriter {
47    data: Vec<u8>,
48    current_byte: u8,
49    bit_count: u8,
50}
51
52impl BitWriter {
53    fn new() -> Self {
54        Self {
55            data: Vec::new(),
56            current_byte: 0,
57            bit_count: 0,
58        }
59    }
60
61    /// Write a single bit
62    fn write_bit(&mut self, bit: bool) {
63        if bit {
64            self.current_byte |= 1u8 << (7 - self.bit_count);
65        }
66        self.bit_count += 1;
67
68        if self.bit_count == 8 {
69            self.data.push(self.current_byte);
70            self.current_byte = 0;
71            self.bit_count = 0;
72        }
73    }
74
75    /// Write multiple bits (up to 64)
76    fn write_bits(&mut self, value: u64, num_bits: u8) {
77        for i in 0..num_bits {
78            let bit = ((value >> (num_bits - 1 - i)) & 1) != 0;
79            self.write_bit(bit);
80        }
81    }
82
83    /// Finish writing (flush remaining bits)
84    fn finish(mut self) -> Vec<u8> {
85        if self.bit_count > 0 {
86            self.data.push(self.current_byte);
87        }
88        self.data
89    }
90}
91
92/// Golomb-Rice encode a value
93///
94/// BIP158: Encode value x as:
95/// - Write (x / 2^P) in unary (that many 1s, then a 0)
96/// - Write (x mod 2^P) in binary (P bits)
97fn golomb_rice_encode(value: u64, p: u8) -> Vec<u8> {
98    let mut writer = BitWriter::new();
99
100    // Calculate quotient and remainder
101    let quotient = value >> p; // value / 2^P
102    let remainder = value & ((1u64 << p) - 1); // value mod 2^P
103
104    // Encode quotient in unary (quotient number of 1s, then a 0)
105    for _ in 0..quotient {
106        writer.write_bit(true);
107    }
108    writer.write_bit(false); // Terminate unary with 0
109
110    // Encode remainder in binary (P bits)
111    writer.write_bits(remainder, p);
112
113    writer.finish()
114}
115
116/// Bit reader for Golomb-Rice decoding
117struct BitReader<'a> {
118    data: &'a [u8],
119    bit_offset: usize,
120}
121
122impl<'a> BitReader<'a> {
123    fn new(data: &'a [u8]) -> Self {
124        Self {
125            data,
126            bit_offset: 0,
127        }
128    }
129
130    /// Read a single bit
131    fn read_bit(&mut self) -> Option<bool> {
132        if self.bit_offset >= self.data.len() * 8 {
133            return None;
134        }
135        let byte_idx = self.bit_offset / 8;
136        let bit_idx = self.bit_offset % 8;
137        let bit = (self.data[byte_idx] >> (7 - bit_idx)) & 1;
138        self.bit_offset += 1;
139        Some(bit == 1)
140    }
141
142    /// Read P bits as a u64
143    fn read_bits(&mut self, p: u8) -> Option<u64> {
144        let mut value = 0u64;
145        for _ in 0..p {
146            if let Some(bit) = self.read_bit() {
147                value = (value << 1) | (if bit { 1 } else { 0 });
148            } else {
149                return None;
150            }
151        }
152        Some(value)
153    }
154
155    /// Get current bit offset
156    #[allow(dead_code)]
157    fn bit_offset(&self) -> usize {
158        self.bit_offset
159    }
160}
161
162/// Golomb-Rice decode a value from stream
163///
164/// BIP158: Decode value x by:
165/// - Read unary-encoded quotient (count 1s until 0)
166/// - Read P bits as remainder
167/// - Value = quotient * 2^P + remainder
168fn golomb_rice_decode(reader: &mut BitReader, p: u8) -> Option<u64> {
169    // Read quotient in unary (count 1s until we hit a 0)
170    let mut quotient = 0u64;
171    loop {
172        match reader.read_bit() {
173            Some(true) => quotient += 1,
174            Some(false) => break,
175            None => return None,
176        }
177    }
178
179    // Read remainder in binary (P bits)
180    let remainder = reader.read_bits(p)?;
181
182    // Reconstruct value: quotient * 2^P + remainder
183    Some((quotient << p) | remainder)
184}
185
186/// Build a compact block filter from transaction data
187///
188/// BIP158: Filter contains:
189/// 1. All spendable output scriptPubKeys in the block
190/// 2. All scriptPubKeys from outputs spent by block's inputs
191pub fn build_block_filter(
192    block_transactions: &[Transaction],
193    previous_outpoint_scripts: &[Vec<u8>], // Scripts from UTXOs being spent
194) -> Result<CompactBlockFilter, String> {
195    let mut filter_set = HashSet::new();
196
197    // Add all scriptPubKeys from block outputs
198    for tx in block_transactions {
199        for output in &tx.outputs {
200            if !output.script_pubkey.is_empty() {
201                filter_set.insert(output.script_pubkey.clone());
202            }
203        }
204    }
205
206    // Add all scriptPubKeys from inputs (UTXOs being spent)
207    for script in previous_outpoint_scripts {
208        if !script.is_empty() {
209            filter_set.insert(script.clone());
210        }
211    }
212
213    // Convert to sorted vector of hashed values
214    let n = filter_set.len() as u64;
215    if n == 0 {
216        // Empty filter
217        return Ok(CompactBlockFilter {
218            filter_data: Vec::new(),
219            num_elements: 0,
220        });
221    }
222
223    // Hash each script to range [0, N*M)
224    let mut hashed_values: Vec<u64> = filter_set
225        .iter()
226        .map(|script| hash_to_range(script, n, BIP158_M))
227        .collect();
228
229    // Sort and remove duplicates
230    hashed_values.sort_unstable();
231    hashed_values.dedup();
232
233    // Update n after deduplication
234    let n_final = hashed_values.len() as u64;
235
236    // Compute differences between consecutive values
237    // First value is difference from 0
238    let mut differences = Vec::new();
239    if !hashed_values.is_empty() {
240        differences.push(hashed_values[0]);
241        for i in 1..hashed_values.len() {
242            let diff = hashed_values[i] - hashed_values[i - 1];
243            differences.push(diff);
244        }
245    }
246
247    // Encode differences using Golomb-Rice
248    let mut filter_data = Vec::new();
249    for diff in differences {
250        let encoded = golomb_rice_encode(diff, BIP158_P);
251        filter_data.extend_from_slice(&encoded);
252    }
253
254    Ok(CompactBlockFilter {
255        filter_data,
256        num_elements: n_final as u32,
257    })
258}
259
260/// Match a script against a compact block filter
261///
262/// Returns true if the script (or its hash) is likely in the filter
263///
264/// Algorithm:
265/// 1. Hash the script to get a value in range [0, N*M)
266/// 2. Decode the filter to reconstruct the sorted set of hashed values
267/// 3. Check if the script's hash value is in the set
268pub fn match_filter(filter: &CompactBlockFilter, script: &[u8]) -> bool {
269    if filter.num_elements == 0 {
270        return false;
271    }
272
273    let n = filter.num_elements as u64;
274
275    // Hash script to range [0, N*M)
276    let script_hash = hash_to_range(script, n, BIP158_M);
277
278    // Decode filter to reconstruct sorted set
279    let mut reader = BitReader::new(&filter.filter_data);
280    let mut decoded_values = Vec::new();
281    let mut current_value = 0u64;
282
283    // Decode all differences and reconstruct values
284    for _ in 0..filter.num_elements {
285        if let Some(diff) = golomb_rice_decode(&mut reader, BIP158_P) {
286            current_value += diff;
287            decoded_values.push(current_value);
288        } else {
289            // Decoding failed - filter may be corrupted
290            return false;
291        }
292    }
293
294    // Check if script_hash is in the decoded set
295    decoded_values.binary_search(&script_hash).is_ok()
296}
297
298#[cfg(test)]
299mod tests {
300    use super::*;
301
302    #[test]
303    fn test_hash_to_range() {
304        let script = b"test script";
305        let n = 100;
306        let m = BIP158_M;
307        let value = hash_to_range(script, n, m);
308        assert!(value < n * m);
309    }
310
311    #[test]
312    fn test_golomb_rice_encode() {
313        // Test encoding small value
314        let encoded = golomb_rice_encode(0, BIP158_P);
315        assert!(!encoded.is_empty());
316
317        let encoded2 = golomb_rice_encode(1, BIP158_P);
318        assert!(!encoded2.is_empty());
319    }
320
321    #[test]
322    fn test_empty_filter() {
323        let filter = build_block_filter(&[], &[]).unwrap();
324        assert_eq!(filter.num_elements, 0);
325        assert!(filter.filter_data.is_empty());
326    }
327
328    #[test]
329    fn test_golomb_rice_encode_decode_roundtrip() {
330        let test_values = vec![0, 1, 2, 10, 100, 1000, 10000];
331
332        for value in test_values {
333            let encoded = golomb_rice_encode(value, BIP158_P);
334            let mut reader = BitReader::new(&encoded);
335            let decoded = golomb_rice_decode(&mut reader, BIP158_P);
336
337            assert_eq!(decoded, Some(value), "Roundtrip failed for value {value}");
338        }
339    }
340
341    #[test]
342    fn test_build_and_match_filter() {
343        use crate::{OutPoint, Transaction, TransactionInput, TransactionOutput};
344
345        // Create a test transaction
346        let tx = Transaction {
347            version: 1,
348            inputs: vec![TransactionInput {
349                prevout: OutPoint {
350                    hash: [0u8; 32],
351                    index: 0,
352                },
353                script_sig: vec![],
354                sequence: 0xffffffff,
355            }]
356            .into(),
357            outputs: vec![TransactionOutput {
358                value: 1000,
359                script_pubkey: vec![blvm_consensus::opcodes::OP_1, blvm_consensus::opcodes::OP_2],
360            }]
361            .into(),
362            lock_time: 0,
363        };
364
365        // Build filter
366        let filter = build_block_filter(&[tx.clone()], &[]).unwrap();
367        assert!(filter.num_elements > 0);
368
369        // Match the script that's in the filter
370        let script_in_filter = &tx.outputs[0].script_pubkey;
371        assert!(match_filter(&filter, script_in_filter));
372
373        // Match a script that's not in the filter
374        let script_not_in_filter = vec![0x53, 0x54]; // OP_3 OP_4
375                                                     // Note: May have false positives due to GCS nature, but should generally work
376        let _matched = match_filter(&filter, &script_not_in_filter);
377        // False positives are possible, so we can't assert false
378        // But we can verify the filter works for scripts that are definitely in it
379    }
380}