Skip to main content

blockchain_zc_parser/
block.rs

1//! Bitcoin block header and full block parsing — zero-copy.
2
3use crate::{
4    cursor::Cursor,
5    error::{ParseError, ParseResult},
6    hash::Hash32,
7    transaction::TransactionParser,
8};
9
10/// Bitcoin mainnet magic bytes.
11pub const MAINNET_MAGIC: [u8; 4] = [0xf9, 0xbe, 0xb4, 0xd9];
12/// Bitcoin testnet3 magic bytes.
13pub const TESTNET_MAGIC: [u8; 4] = [0x0b, 0x11, 0x09, 0x07];
14/// Bitcoin signet magic bytes.
15pub const SIGNET_MAGIC: [u8; 4] = [0x0a, 0x03, 0xcf, 0x40];
16
17/// Maximum number of transactions per block (sanity cap for iteration).
18pub const MAX_BLOCK_TXN: usize = 1_000_000;
19
20/// Maximum raw block payload size we will accept from a `.dat` file entry (sanity cap).
21///
22/// Bitcoin Core uses 4,000,000 weight units, but raw serialized blocks can still be large.
23/// This is a defensive cap for parsing file blobs.
24pub const MAX_BLOCK_BYTES: usize = 8_000_000;
25
26// ---------------------------------------------------------------------------
27// Block header
28// ---------------------------------------------------------------------------
29
30/// An 80-byte Bitcoin block header — zero-copy.
31///
32/// All hash fields borrow directly from the parse buffer.
33#[derive(Debug, Clone, Copy)]
34pub struct BlockHeader<'a> {
35    /// Protocol version.
36    pub version: i32,
37    /// Hash of the previous block header.
38    pub prev_block: Hash32<'a>,
39    /// Merkle root of all transactions.
40    pub merkle_root: Hash32<'a>,
41    /// Unix timestamp (seconds since epoch).
42    pub timestamp: u32,
43    /// Compact difficulty target.
44    pub bits: u32,
45    /// Nonce.
46    pub nonce: u32,
47    /// Raw 80-byte header (for hashing / comparison).
48    pub raw: &'a [u8; 80],
49}
50
51impl<'a> BlockHeader<'a> {
52    /// Parse exactly 80 bytes as a block header.
53    pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
54        let raw: &'a [u8; 80] = c.read_array::<80>()?;
55        // Parse fields from a sub-cursor over the same bytes (zero-copy).
56        let mut sub = Cursor::new(raw.as_slice());
57        let version = sub.read_i32_le()?;
58        let prev_block = Hash32(sub.read_array::<32>()?);
59        let merkle_root = Hash32(sub.read_array::<32>()?);
60        let timestamp = sub.read_u32_le()?;
61        let bits = sub.read_u32_le()?;
62        let nonce = sub.read_u32_le()?;
63        Ok(Self {
64            version,
65            prev_block,
66            merkle_root,
67            timestamp,
68            bits,
69            nonce,
70            raw,
71        })
72    }
73
74    /// Compute the block hash (double-SHA-256 of the 80-byte header).
75    ///
76    /// Returns a stack-allocated `[u8; 32]` in Bitcoin display byte order.
77    #[cfg(feature = "std")]
78    pub fn block_hash(&self) -> [u8; 32] {
79        crate::hash::double_sha256(self.raw.as_slice())
80    }
81
82    /// Decode the compact difficulty `bits` to a 256-bit target.
83    ///
84    /// Returns `(mantissa, exponent)` such that `target = mantissa << (8 * (exponent - 3))`.
85    #[inline]
86    pub fn difficulty_target(&self) -> (u32, u8) {
87        let exp = (self.bits >> 24) as u8;
88        let mantissa = self.bits & 0x00ff_ffff;
89        (mantissa, exp)
90    }
91}
92
93// ---------------------------------------------------------------------------
94// Block iterator
95// ---------------------------------------------------------------------------
96
97/// A streaming iterator over the transactions in a raw block.
98///
99/// No allocation is performed. Each call to [`BlockTxIter::next_tx`] advances
100/// the cursor and calls a user-supplied closure.
101pub struct BlockTxIter<'a> {
102    cursor: Cursor<'a>,
103    total: usize,
104    consumed: usize,
105}
106
107impl<'a> BlockTxIter<'a> {
108    /// Position the iterator just after the block header.
109    pub fn new(data: &'a [u8]) -> ParseResult<(BlockHeader<'a>, Self)> {
110        let mut c = Cursor::new(data);
111        let header = BlockHeader::parse(&mut c)?;
112        let tx_count_u64 = c.read_varint()?;
113        let tx_count: usize = tx_count_u64
114            .try_into()
115            .map_err(|_| ParseError::IntegerTooLarge {
116                value: tx_count_u64,
117            })?;
118        if tx_count > MAX_BLOCK_TXN {
119            return Err(ParseError::OversizedData {
120                size: tx_count,
121                max: MAX_BLOCK_TXN,
122            });
123        }
124        Ok((
125            header,
126            Self {
127                cursor: c,
128                total: tx_count,
129                consumed: 0,
130            },
131        ))
132    }
133
134    /// Total number of transactions in this block.
135    #[inline]
136    pub fn total(&self) -> usize {
137        self.total
138    }
139
140    /// Number of transactions already consumed.
141    #[inline]
142    pub fn consumed(&self) -> usize {
143        self.consumed
144    }
145
146    /// Number of bytes remaining in the underlying cursor (unparsed portion of the block payload).
147    #[inline]
148    pub fn bytes_remaining(&self) -> usize {
149        self.cursor.remaining()
150    }
151
152    /// Number of bytes consumed so far from the underlying block payload.
153    ///
154    /// This is useful for instrumentation / debugging: after parsing `n` transactions,
155    /// you can see how many bytes of the block were actually consumed.
156    #[inline]
157    pub fn bytes_consumed(&self) -> usize {
158        self.cursor.position()
159    }
160
161    /// Parse the next transaction and pass it to `f`.
162    ///
163    /// Returns `Ok(true)` if a transaction was parsed, `Ok(false)` at end of block.
164    pub fn next_tx<FI, FO>(&mut self, on_input: FI, on_output: FO) -> ParseResult<bool>
165    where
166        FI: FnMut(crate::transaction::TxInput<'a>) -> ParseResult<()>,
167        FO: FnMut(crate::transaction::TxOutput<'a>) -> ParseResult<()>,
168    {
169        if self.consumed >= self.total {
170            return Ok(false);
171        }
172        // Create a parser over the remaining (unread) buffer.
173        // `as_slice()` returns a `&'a [u8]` zero-copy sub-slice.
174        let remaining: &'a [u8] = self.cursor.as_slice();
175        let mut parser = TransactionParser::new(remaining);
176        parser.parse_with(on_input, on_output)?;
177        // Advance the outer cursor by exactly the bytes the tx consumed.
178        let tx_len = parser.bytes_consumed();
179        self.cursor.skip(tx_len)?;
180        self.consumed += 1;
181        Ok(true)
182    }
183
184    /// Skip all remaining transactions (fast path — just advance the cursor).
185    pub fn skip_remaining(&mut self) -> ParseResult<()> {
186        while self.consumed < self.total {
187            self.next_tx(|_| Ok(()), |_| Ok(()))?;
188        }
189        Ok(())
190    }
191
192    /// In strict mode, ensure we've parsed exactly `total` transactions and consumed the buffer.
193    ///
194    /// Call this after iterating all transactions. If there are unread bytes remaining,
195    /// this returns an error to surface offset / format bugs early.
196    pub fn finish_strict(&self) -> ParseResult<()> {
197        if self.consumed != self.total {
198            return Err(ParseError::IncompleteTransactions {
199                expected: self.total,
200                parsed: self.consumed,
201            });
202        }
203        let rem = self.cursor.remaining();
204        if rem != 0 {
205            return Err(ParseError::TrailingBytes { remaining: rem });
206        }
207        Ok(())
208    }
209
210    /// Convenience helper: iterate all transactions and enforce strict completion.
211    pub fn consume_all_strict<FI, FO>(
212        mut self,
213        mut on_input: FI,
214        mut on_output: FO,
215    ) -> ParseResult<()>
216    where
217        FI: FnMut(crate::transaction::TxInput<'a>) -> ParseResult<()>,
218        FO: FnMut(crate::transaction::TxOutput<'a>) -> ParseResult<()>,
219    {
220        let mut coinbase_txs: usize = 0;
221
222        while self.consumed < self.total {
223            let tx_index = self.consumed;
224            let mut saw_first_input = false;
225            let mut is_coinbase_tx = false;
226
227            // Wrap the user callback to detect coinbase on the first input.
228            let mut wrapped_on_input = |input: crate::transaction::TxInput<'a>| -> ParseResult<()> {
229                if !saw_first_input {
230                    saw_first_input = true;
231                    // Coinbase is identified by an all-zero prevout txid and vout=0xffff_ffff.
232                    // Coinbase if prevout txid is all-zero and vout is 0xffff_ffff.
233                    is_coinbase_tx = input.previous_output.is_coinbase();
234                    if is_coinbase_tx {
235                        coinbase_txs += 1;
236                        // In strict mode, coinbase must be the first transaction.
237                        if tx_index != 0 {
238                            return Err(ParseError::InvalidCoinbaseCount {
239                                count: coinbase_txs,
240                            });
241                        }
242                    }
243                }
244                on_input(input)
245            };
246
247            let mut wrapped_on_output =
248                |output: crate::transaction::TxOutput<'a>| -> ParseResult<()> { on_output(output) };
249
250            // Parse exactly one transaction.
251            let _ = self.next_tx(&mut wrapped_on_input, &mut wrapped_on_output)?;
252        }
253
254        // Enforce strict completion.
255        self.finish_strict()?;
256
257        // Enforce exactly one coinbase transaction per block.
258        if coinbase_txs != 1 {
259            return Err(ParseError::InvalidCoinbaseCount {
260                count: coinbase_txs,
261            });
262        }
263        Ok(())
264    }
265}
266
267// ---------------------------------------------------------------------------
268// Raw block file parser (blkNNNNN.dat format)
269// ---------------------------------------------------------------------------
270
271/// Iterator over raw block messages in a Bitcoin Core `blkNNNNN.dat` file.
272///
273/// Each entry in these files is: `[magic: 4] [size: u32-le] [block: size bytes]`.
274pub struct BlkFileIter<'a> {
275    cursor: Cursor<'a>,
276    magic: [u8; 4],
277}
278
279impl<'a> BlkFileIter<'a> {
280    /// Create an iterator over a raw `.dat` file buffer with the given magic.
281    pub fn new(data: &'a [u8], magic: [u8; 4]) -> Self {
282        Self {
283            cursor: Cursor::new(data),
284            magic,
285        }
286    }
287
288    /// Return the next raw block bytes (zero-copy sub-slice), or `None` at EOF.
289    pub fn next_block(&mut self) -> ParseResult<Option<&'a [u8]>> {
290        if self.cursor.is_empty() {
291            return Ok(None);
292        }
293        let magic: &[u8; 4] = self.cursor.read_array::<4>()?;
294        if *magic != self.magic {
295            return Err(ParseError::MagicMismatch {
296                expected: self.magic,
297                got: *magic,
298            });
299        }
300        let size = self.cursor.read_u32_le()? as usize;
301        if size > MAX_BLOCK_BYTES {
302            return Err(ParseError::OversizedData {
303                size,
304                max: MAX_BLOCK_BYTES,
305            });
306        }
307        let block_bytes = self.cursor.read_bytes(size)?;
308        Ok(Some(block_bytes))
309    }
310}
311
312#[cfg(test)]
313mod tests {
314    use super::*;
315
316    fn genesis_header_raw() -> [u8; 80] {
317        // Bitcoin genesis block header (mainnet)
318        hex_literal::hex!(
319            "01000000"                                                         // version
320            "0000000000000000000000000000000000000000000000000000000000000000" // prev_block
321            "3ba3edfd7a7b12b27ac72c3e67768f617fc81bc3888a51323a9fb8aa4b1e5e4a" // merkle_root
322            "29ab5f49"                                                         // timestamp
323            "ffff001d"                                                         // bits
324            "1dac2b7c"                                                         // nonce
325        )
326    }
327
328    #[test]
329    fn parse_genesis_header() {
330        let raw = genesis_header_raw();
331        let mut cursor = Cursor::new(&raw);
332        let header = BlockHeader::parse(&mut cursor).unwrap();
333        assert_eq!(header.version, 1);
334        assert_eq!(header.nonce, 0x7c2bac1d);
335        assert!(header.prev_block.as_bytes().iter().all(|&b| b == 0));
336    }
337
338    #[test]
339    fn header_is_zero_copy() {
340        let raw = genesis_header_raw();
341        let raw_ptr = raw.as_ptr();
342        let mut cursor = Cursor::new(&raw);
343        let header = BlockHeader::parse(&mut cursor).unwrap();
344        // prev_block bytes should point into the original buffer
345        // SAFETY: `raw_ptr` comes from `raw`, which is an 80-byte array.
346        // Adding 4 is within bounds and points to the start of `prev_block`
347        // inside the same header buffer.
348        assert_eq!(header.prev_block.as_bytes().as_ptr(), unsafe {
349            raw_ptr.add(4)
350        });
351    }
352}