blockchain_zc_parser/block.rs
1//! Bitcoin block header and full block parsing — zero-copy.
2
3use crate::{
4 cursor::Cursor,
5 error::{ParseError, ParseResult},
6 hash::Hash32,
7 transaction::TransactionParser,
8};
9
10/// Bitcoin mainnet magic bytes.
11pub const MAINNET_MAGIC: [u8; 4] = [0xf9, 0xbe, 0xb4, 0xd9];
12/// Bitcoin testnet3 magic bytes.
13pub const TESTNET_MAGIC: [u8; 4] = [0x0b, 0x11, 0x09, 0x07];
14/// Bitcoin signet magic bytes.
15pub const SIGNET_MAGIC: [u8; 4] = [0x0a, 0x03, 0xcf, 0x40];
16
17/// Maximum number of transactions per block (sanity cap for iteration).
18pub const MAX_BLOCK_TXN: usize = 1_000_000;
19
20/// Maximum raw block payload size we will accept from a `.dat` file entry (sanity cap).
21///
22/// Bitcoin Core uses 4,000,000 weight units, but raw serialized blocks can still be large.
23/// This is a defensive cap for parsing file blobs.
24pub const MAX_BLOCK_BYTES: usize = 8_000_000;
25
26// ---------------------------------------------------------------------------
27// Block header
28// ---------------------------------------------------------------------------
29
30/// An 80-byte Bitcoin block header — zero-copy.
31///
32/// All hash fields borrow directly from the parse buffer.
33#[derive(Debug, Clone, Copy)]
34pub struct BlockHeader<'a> {
35 /// Protocol version.
36 pub version: i32,
37 /// Hash of the previous block header.
38 pub prev_block: Hash32<'a>,
39 /// Merkle root of all transactions.
40 pub merkle_root: Hash32<'a>,
41 /// Unix timestamp (seconds since epoch).
42 pub timestamp: u32,
43 /// Compact difficulty target.
44 pub bits: u32,
45 /// Nonce.
46 pub nonce: u32,
47 /// Raw 80-byte header (for hashing / comparison).
48 pub raw: &'a [u8; 80],
49}
50
51impl<'a> BlockHeader<'a> {
52 /// Parse exactly 80 bytes as a block header.
53 pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
54 let raw: &'a [u8; 80] = c.read_array::<80>()?;
55 // Parse fields from a sub-cursor over the same bytes (zero-copy).
56 let mut sub = Cursor::new(raw.as_slice());
57 let version = sub.read_i32_le()?;
58 let prev_block = Hash32(sub.read_array::<32>()?);
59 let merkle_root = Hash32(sub.read_array::<32>()?);
60 let timestamp = sub.read_u32_le()?;
61 let bits = sub.read_u32_le()?;
62 let nonce = sub.read_u32_le()?;
63 Ok(Self {
64 version,
65 prev_block,
66 merkle_root,
67 timestamp,
68 bits,
69 nonce,
70 raw,
71 })
72 }
73
74 /// Compute the block hash (double-SHA-256 of the 80-byte header).
75 ///
76 /// Returns a stack-allocated `[u8; 32]` in Bitcoin display byte order.
77 #[cfg(feature = "std")]
78 pub fn block_hash(&self) -> [u8; 32] {
79 crate::hash::double_sha256(self.raw.as_slice())
80 }
81
82 /// Decode the compact difficulty `bits` to a 256-bit target.
83 ///
84 /// Returns `(mantissa, exponent)` such that `target = mantissa << (8 * (exponent - 3))`.
85 #[inline]
86 pub fn difficulty_target(&self) -> (u32, u8) {
87 let exp = (self.bits >> 24) as u8;
88 let mantissa = self.bits & 0x00ff_ffff;
89 (mantissa, exp)
90 }
91}
92
93// ---------------------------------------------------------------------------
94// Block iterator
95// ---------------------------------------------------------------------------
96
97/// A streaming iterator over the transactions in a raw block.
98///
99/// No allocation is performed. Each call to [`BlockTxIter::next_tx`] advances
100/// the cursor and calls a user-supplied closure.
101pub struct BlockTxIter<'a> {
102 cursor: Cursor<'a>,
103 total: usize,
104 consumed: usize,
105}
106
107impl<'a> BlockTxIter<'a> {
108 /// Position the iterator just after the block header.
109 pub fn new(data: &'a [u8]) -> ParseResult<(BlockHeader<'a>, Self)> {
110 let mut c = Cursor::new(data);
111 let header = BlockHeader::parse(&mut c)?;
112 let tx_count_u64 = c.read_varint()?;
113 let tx_count: usize = tx_count_u64
114 .try_into()
115 .map_err(|_| ParseError::IntegerTooLarge {
116 value: tx_count_u64,
117 })?;
118 if tx_count > MAX_BLOCK_TXN {
119 return Err(ParseError::OversizedData {
120 size: tx_count,
121 max: MAX_BLOCK_TXN,
122 });
123 }
124 Ok((
125 header,
126 Self {
127 cursor: c,
128 total: tx_count,
129 consumed: 0,
130 },
131 ))
132 }
133
134 /// Total number of transactions in this block.
135 #[inline]
136 pub fn total(&self) -> usize {
137 self.total
138 }
139
140 /// Number of transactions already consumed.
141 #[inline]
142 pub fn consumed(&self) -> usize {
143 self.consumed
144 }
145
146 /// Number of bytes remaining in the underlying cursor (unparsed portion of the block payload).
147 #[inline]
148 pub fn bytes_remaining(&self) -> usize {
149 self.cursor.remaining()
150 }
151
152 /// Number of bytes consumed so far from the underlying block payload.
153 ///
154 /// This is useful for instrumentation / debugging: after parsing `n` transactions,
155 /// you can see how many bytes of the block were actually consumed.
156 #[inline]
157 pub fn bytes_consumed(&self) -> usize {
158 self.cursor.position()
159 }
160
161 /// Parse the next transaction and pass it to `f`.
162 ///
163 /// Returns `Ok(true)` if a transaction was parsed, `Ok(false)` at end of block.
164 pub fn next_tx<FI, FO>(&mut self, on_input: FI, on_output: FO) -> ParseResult<bool>
165 where
166 FI: FnMut(crate::transaction::TxInput<'a>) -> ParseResult<()>,
167 FO: FnMut(crate::transaction::TxOutput<'a>) -> ParseResult<()>,
168 {
169 if self.consumed >= self.total {
170 return Ok(false);
171 }
172 // Create a parser over the remaining (unread) buffer.
173 // `as_slice()` returns a `&'a [u8]` zero-copy sub-slice.
174 let remaining: &'a [u8] = self.cursor.as_slice();
175 let mut parser = TransactionParser::new(remaining);
176 parser.parse_with(on_input, on_output)?;
177 // Advance the outer cursor by exactly the bytes the tx consumed.
178 let tx_len = parser.bytes_consumed();
179 self.cursor.skip(tx_len)?;
180 self.consumed += 1;
181 Ok(true)
182 }
183
184 /// Skip all remaining transactions (fast path — just advance the cursor).
185 pub fn skip_remaining(&mut self) -> ParseResult<()> {
186 while self.consumed < self.total {
187 self.next_tx(|_| Ok(()), |_| Ok(()))?;
188 }
189 Ok(())
190 }
191
192 /// In strict mode, ensure we've parsed exactly `total` transactions and consumed the buffer.
193 ///
194 /// Call this after iterating all transactions. If there are unread bytes remaining,
195 /// this returns an error to surface offset / format bugs early.
196 pub fn finish_strict(&self) -> ParseResult<()> {
197 if self.consumed != self.total {
198 return Err(ParseError::IncompleteTransactions {
199 expected: self.total,
200 parsed: self.consumed,
201 });
202 }
203 let rem = self.cursor.remaining();
204 if rem != 0 {
205 return Err(ParseError::TrailingBytes { remaining: rem });
206 }
207 Ok(())
208 }
209
210 /// Convenience helper: iterate all transactions and enforce strict completion.
211 pub fn consume_all_strict<FI, FO>(
212 mut self,
213 mut on_input: FI,
214 mut on_output: FO,
215 ) -> ParseResult<()>
216 where
217 FI: FnMut(crate::transaction::TxInput<'a>) -> ParseResult<()>,
218 FO: FnMut(crate::transaction::TxOutput<'a>) -> ParseResult<()>,
219 {
220 let mut coinbase_txs: usize = 0;
221
222 while self.consumed < self.total {
223 let tx_index = self.consumed;
224 let mut saw_first_input = false;
225 let mut is_coinbase_tx = false;
226
227 // Wrap the user callback to detect coinbase on the first input.
228 let mut wrapped_on_input = |input: crate::transaction::TxInput<'a>| -> ParseResult<()> {
229 if !saw_first_input {
230 saw_first_input = true;
231 // Coinbase is identified by an all-zero prevout txid and vout=0xffff_ffff.
232 // Coinbase if prevout txid is all-zero and vout is 0xffff_ffff.
233 is_coinbase_tx = input.previous_output.is_coinbase();
234 if is_coinbase_tx {
235 coinbase_txs += 1;
236 // In strict mode, coinbase must be the first transaction.
237 if tx_index != 0 {
238 return Err(ParseError::InvalidCoinbaseCount {
239 count: coinbase_txs,
240 });
241 }
242 }
243 }
244 on_input(input)
245 };
246
247 let mut wrapped_on_output =
248 |output: crate::transaction::TxOutput<'a>| -> ParseResult<()> { on_output(output) };
249
250 // Parse exactly one transaction.
251 let _ = self.next_tx(&mut wrapped_on_input, &mut wrapped_on_output)?;
252 }
253
254 // Enforce strict completion.
255 self.finish_strict()?;
256
257 // Enforce exactly one coinbase transaction per block.
258 if coinbase_txs != 1 {
259 return Err(ParseError::InvalidCoinbaseCount {
260 count: coinbase_txs,
261 });
262 }
263 Ok(())
264 }
265}
266
267// ---------------------------------------------------------------------------
268// Raw block file parser (blkNNNNN.dat format)
269// ---------------------------------------------------------------------------
270
271/// Iterator over raw block messages in a Bitcoin Core `blkNNNNN.dat` file.
272///
273/// Each entry in these files is: `[magic: 4] [size: u32-le] [block: size bytes]`.
274pub struct BlkFileIter<'a> {
275 cursor: Cursor<'a>,
276 magic: [u8; 4],
277}
278
279impl<'a> BlkFileIter<'a> {
280 /// Create an iterator over a raw `.dat` file buffer with the given magic.
281 pub fn new(data: &'a [u8], magic: [u8; 4]) -> Self {
282 Self {
283 cursor: Cursor::new(data),
284 magic,
285 }
286 }
287
288 /// Return the next raw block bytes (zero-copy sub-slice), or `None` at EOF.
289 pub fn next_block(&mut self) -> ParseResult<Option<&'a [u8]>> {
290 if self.cursor.is_empty() {
291 return Ok(None);
292 }
293 let magic: &[u8; 4] = self.cursor.read_array::<4>()?;
294 if *magic != self.magic {
295 return Err(ParseError::MagicMismatch {
296 expected: self.magic,
297 got: *magic,
298 });
299 }
300 let size = self.cursor.read_u32_le()? as usize;
301 if size > MAX_BLOCK_BYTES {
302 return Err(ParseError::OversizedData {
303 size,
304 max: MAX_BLOCK_BYTES,
305 });
306 }
307 let block_bytes = self.cursor.read_bytes(size)?;
308 Ok(Some(block_bytes))
309 }
310}
311
312#[cfg(test)]
313mod tests {
314 use super::*;
315
316 fn genesis_header_raw() -> [u8; 80] {
317 // Bitcoin genesis block header (mainnet)
318 hex_literal::hex!(
319 "01000000" // version
320 "0000000000000000000000000000000000000000000000000000000000000000" // prev_block
321 "3ba3edfd7a7b12b27ac72c3e67768f617fc81bc3888a51323a9fb8aa4b1e5e4a" // merkle_root
322 "29ab5f49" // timestamp
323 "ffff001d" // bits
324 "1dac2b7c" // nonce
325 )
326 }
327
328 #[test]
329 fn parse_genesis_header() {
330 let raw = genesis_header_raw();
331 let mut cursor = Cursor::new(&raw);
332 let header = BlockHeader::parse(&mut cursor).unwrap();
333 assert_eq!(header.version, 1);
334 assert_eq!(header.nonce, 0x7c2bac1d);
335 assert!(header.prev_block.as_bytes().iter().all(|&b| b == 0));
336 }
337
338 #[test]
339 fn header_is_zero_copy() {
340 let raw = genesis_header_raw();
341 let raw_ptr = raw.as_ptr();
342 let mut cursor = Cursor::new(&raw);
343 let header = BlockHeader::parse(&mut cursor).unwrap();
344 // prev_block bytes should point into the original buffer
345 // SAFETY: `raw_ptr` comes from `raw`, which is an 80-byte array.
346 // Adding 4 is within bounds and points to the start of `prev_block`
347 // inside the same header buffer.
348 assert_eq!(header.prev_block.as_bytes().as_ptr(), unsafe {
349 raw_ptr.add(4)
350 });
351 }
352}