blockchain_zc_parser/transaction.rs
1//! Bitcoin transaction parsing — zero-copy, no alloc.
2//!
3//! All structures borrow from the original input buffer via lifetime `'a`.
4
5use crate::{
6 cursor::Cursor,
7 error::{ParseError, ParseResult},
8 hash::Hash32,
9 script::Script,
10};
11
12/// Maximum number of inputs / outputs per transaction (protocol sanity limit).
13pub const MAX_IO_COUNT: usize = 100_000;
14/// Maximum witness items per input.
15pub const MAX_WITNESS_ITEMS: usize = 500;
16/// Maximum witness item size.
17pub const MAX_WITNESS_ITEM_SIZE: usize = 520;
18
19// ---------------------------------------------------------------------------
20// Outpoint
21// ---------------------------------------------------------------------------
22
23/// Reference to a specific output of a previous transaction.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub struct OutPoint<'a> {
26 /// TXID of the referenced transaction (32 bytes, zero-copy).
27 pub txid: Hash32<'a>,
28 /// Index of the output in that transaction.
29 pub vout: u32,
30}
31
32impl<'a> OutPoint<'a> {
33 /// Parse from cursor (36 bytes total).
34 #[inline]
35 pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
36 let txid = Hash32(c.read_array::<32>()?);
37 let vout = c.read_u32_le()?;
38 Ok(Self { txid, vout })
39 }
40
41 /// `true` for a coinbase input (all-zero txid, vout = 0xffffffff).
42 #[inline]
43 pub fn is_coinbase(&self) -> bool {
44 self.txid.as_bytes().iter().all(|&b| b == 0) && self.vout == 0xffff_ffff
45 }
46}
47
48// ---------------------------------------------------------------------------
49// TxInput
50// ---------------------------------------------------------------------------
51
52/// A transaction input, borrowing from the original buffer.
53#[derive(Debug, Clone, Copy)]
54pub struct TxInput<'a> {
55 /// Previous output being spent.
56 pub previous_output: OutPoint<'a>,
57 /// Unlocking script (scriptSig). Empty for SegWit inputs.
58 pub script_sig: Script<'a>,
59 /// Sequence number.
60 pub sequence: u32,
61}
62
63impl<'a> TxInput<'a> {
64 /// Parse one input from the cursor.
65 #[inline]
66 pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
67 let previous_output = OutPoint::parse(c)?;
68 let script_sig = Script::parse(c)?;
69 let sequence = c.read_u32_le()?;
70 Ok(Self {
71 previous_output,
72 script_sig,
73 sequence,
74 })
75 }
76
77 /// `true` if this is a coinbase input.
78 #[inline]
79 pub fn is_coinbase(&self) -> bool {
80 self.previous_output.is_coinbase()
81 }
82
83 /// `true` if this input opts into Replace-By-Fee (BIP 125).
84 #[inline]
85 pub fn is_rbf(&self) -> bool {
86 self.sequence <= 0xffff_fffd
87 }
88}
89
90// ---------------------------------------------------------------------------
91// TxOutput
92// ---------------------------------------------------------------------------
93
94/// A transaction output.
95#[derive(Debug, Clone, Copy)]
96pub struct TxOutput<'a> {
97 /// Satoshi value (unsigned).
98 pub value: u64,
99 /// Locking script (scriptPubKey).
100 pub script_pubkey: Script<'a>,
101}
102
103impl<'a> TxOutput<'a> {
104 /// Parse one output from the cursor.
105 #[inline]
106 pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
107 let value = c.read_u64_le()?;
108 let script_pubkey = Script::parse(c)?;
109 Ok(Self {
110 value,
111 script_pubkey,
112 })
113 }
114}
115
116// ---------------------------------------------------------------------------
117// Witness
118// ---------------------------------------------------------------------------
119
120/// SegWit witness data for a single input — a sequence of byte-string items.
121///
122/// Stores a zero-copy slice covering the entire witness field (item count
123/// varint + all items), so items can be iterated on demand.
124#[derive(Debug, Clone, Copy)]
125pub struct Witness<'a> {
126 /// Raw witness bytes starting at the item-count varint.
127 pub raw: &'a [u8],
128 /// Number of items decoded from the item-count varint.
129 pub item_count: usize,
130}
131
132impl<'a> Witness<'a> {
133 /// Parse one witness from `data` (which should start at the item-count varint).
134 ///
135 /// Returns the parsed [`Witness`] and the number of bytes consumed, so the
136 /// caller can advance its cursor by exactly that amount.
137 pub(crate) fn parse(data: &'a [u8]) -> ParseResult<(Self, usize)> {
138 let mut c = Cursor::new(data);
139 let item_count_u64 = c.read_varint()?;
140 let item_count: usize =
141 item_count_u64
142 .try_into()
143 .map_err(|_| ParseError::IntegerTooLarge {
144 value: item_count_u64,
145 })?;
146 if item_count > MAX_WITNESS_ITEMS {
147 return Err(ParseError::OversizedData {
148 size: item_count,
149 max: MAX_WITNESS_ITEMS,
150 });
151 }
152 for _ in 0..item_count {
153 c.read_var_bytes(MAX_WITNESS_ITEM_SIZE)?;
154 }
155 let consumed = c.position();
156 // SAFETY: `consumed` bytes were validated by the reads above,
157 // so `data[..consumed]` is a valid sub-slice.
158 let raw = unsafe { data.get_unchecked(..consumed) };
159 Ok((Witness { raw, item_count }, consumed))
160 }
161
162 /// Iterate over items in this witness.
163 pub fn items(&self) -> WitnessIter<'a> {
164 // Skip the item_count varint at the start of `raw`.
165 let mut skip_cursor = Cursor::new(self.raw);
166 let _ = skip_cursor.read_varint(); // can't fail — we validated on parse
167 WitnessIter {
168 cursor: Cursor::new(&self.raw[skip_cursor.position()..]),
169 remaining: self.item_count,
170 }
171 }
172}
173
174/// Iterator over witness items.
175pub struct WitnessIter<'a> {
176 cursor: Cursor<'a>,
177 remaining: usize,
178}
179
180impl<'a> Iterator for WitnessIter<'a> {
181 type Item = ParseResult<&'a [u8]>;
182
183 fn next(&mut self) -> Option<Self::Item> {
184 if self.remaining == 0 {
185 return None;
186 }
187 self.remaining -= 1;
188 Some(self.cursor.read_var_bytes(MAX_WITNESS_ITEM_SIZE))
189 }
190}
191
192// ---------------------------------------------------------------------------
193// Transaction
194// ---------------------------------------------------------------------------
195
196/// A fully parsed Bitcoin transaction.
197///
198/// **Note:** This struct requires the caller to provide backing storage for the
199/// `inputs`, `outputs`, and `witnesses` slices (e.g. stack arrays or arena
200/// buffers). For allocation-free use, prefer [`TransactionParser`] with its
201/// closure API.
202///
203/// All fields borrow from the original parse buffer — zero allocations in the
204/// parser itself.
205#[allow(dead_code)]
206pub struct Transaction<'a> {
207 /// Serialised version (1 or 2).
208 pub version: i32,
209 /// Whether this transaction uses the SegWit format (BIP 141).
210 pub is_segwit: bool,
211 /// Transaction inputs.
212 pub inputs: &'a [TxInput<'a>],
213 /// Transaction outputs.
214 pub outputs: &'a [TxOutput<'a>],
215 /// Witness data, one per input (empty slice for non-segwit).
216 pub witnesses: &'a [Witness<'a>],
217 /// Lock time.
218 pub locktime: u32,
219 /// Zero-copy reference to the raw bytes of this transaction.
220 pub raw: &'a [u8],
221}
222
223impl core::fmt::Debug for Transaction<'_> {
224 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
225 f.debug_struct("Transaction")
226 .field("version", &self.version)
227 .field("is_segwit", &self.is_segwit)
228 .field("input_count", &self.inputs.len())
229 .field("output_count", &self.outputs.len())
230 .field("locktime", &self.locktime)
231 .field("raw_len", &self.raw.len())
232 .finish()
233 }
234}
235
236// ---------------------------------------------------------------------------
237// Streaming parser (no alloc)
238// ---------------------------------------------------------------------------
239
240/// Streaming, callback-based transaction parser for `no_std` / zero-alloc use.
241///
242/// Instead of collecting inputs/outputs into a slice, this parser calls user-
243/// supplied closures for each element, allowing the caller to decide how to
244/// store or discard them.
245///
246/// ```rust
247/// # use blockchain_zc_parser::transaction::TransactionParser;
248/// # let raw = &[0u8; 0]; // placeholder
249/// let mut parser = TransactionParser::new(raw);
250/// // parser.parse_with(|input| { ... }, |output| { ... });
251/// ```
252pub struct TransactionParser<'a> {
253 cursor: Cursor<'a>,
254}
255
256impl<'a> TransactionParser<'a> {
257 /// Create a new parser positioned at the start of a raw transaction.
258 pub fn new(data: &'a [u8]) -> Self {
259 Self {
260 cursor: Cursor::new(data),
261 }
262 }
263
264 /// How many bytes have been consumed so far.
265 ///
266 /// Call this after [`parse_with`](Self::parse_with) to advance an outer
267 /// cursor by exactly the number of bytes this transaction occupied.
268 #[inline]
269 pub fn bytes_consumed(&self) -> usize {
270 self.cursor.position()
271 }
272
273 /// Parse the transaction, calling `on_input` for each input and
274 /// `on_output` for each output.
275 ///
276 /// Witness data is skipped unless you need it (saves work for indexers).
277 ///
278 /// Returns `(version, locktime, input_count, output_count)`.
279 pub fn parse_with<FI, FO>(
280 &mut self,
281 mut on_input: FI,
282 mut on_output: FO,
283 ) -> ParseResult<(i32, u32, usize, usize)>
284 where
285 FI: FnMut(TxInput<'a>) -> ParseResult<()>,
286 FO: FnMut(TxOutput<'a>) -> ParseResult<()>,
287 {
288 let c = &mut self.cursor;
289 let version = c.read_i32_le()?;
290
291 // Local helper for varint parsing from first byte
292 #[inline]
293 fn varint_from_first<'a>(c: &mut Cursor<'a>, first: u8) -> ParseResult<u64> {
294 Ok(match first {
295 0x00..=0xfc => first as u64,
296 0xfd => c.read_u16_le()? as u64,
297 0xfe => c.read_u32_le()? as u64,
298 0xff => c.read_u64_le()?,
299 })
300 }
301
302 // Detect SegWit marker and read input count
303 let first_byte = c.read_u8()?;
304 let is_segwit = if first_byte == 0x00 {
305 // Potential SegWit marker. In legacy format this would mean 0 inputs,
306 // which is invalid; we treat 0x00 0x01 as SegWit, otherwise error.
307 let flag = c.read_u8()?;
308 if flag != 0x01 {
309 return Err(ParseError::InvalidSegwitFlag(flag));
310 }
311 true
312 } else {
313 false
314 };
315
316 let input_count_u64 = if is_segwit {
317 c.read_varint()?
318 } else {
319 // `first_byte` is the first byte of the input-count varint.
320 varint_from_first(c, first_byte)?
321 };
322
323 let input_count: usize =
324 input_count_u64
325 .try_into()
326 .map_err(|_| ParseError::IntegerTooLarge {
327 value: input_count_u64,
328 })?;
329
330 if input_count == 0 {
331 return Err(ParseError::InvalidInputCount);
332 }
333
334 if input_count > MAX_IO_COUNT {
335 return Err(ParseError::OversizedData {
336 size: input_count,
337 max: MAX_IO_COUNT,
338 });
339 }
340
341 for _ in 0..input_count {
342 let input = TxInput::parse(c)?;
343 on_input(input)?;
344 }
345
346 let output_count_u64 = c.read_varint()?;
347 let output_count: usize =
348 output_count_u64
349 .try_into()
350 .map_err(|_| ParseError::IntegerTooLarge {
351 value: output_count_u64,
352 })?;
353 if output_count > MAX_IO_COUNT {
354 return Err(ParseError::OversizedData {
355 size: output_count,
356 max: MAX_IO_COUNT,
357 });
358 }
359
360 for _ in 0..output_count {
361 let output = TxOutput::parse(c)?;
362 on_output(output)?;
363 }
364
365 // Parse (and discard) witness data, using Witness::parse so the logic
366 // lives in one place. Callers that need witness access can use the
367 // returned Witness values by adding an `on_witness` callback in future.
368 if is_segwit {
369 for _ in 0..input_count {
370 let (_, consumed) = Witness::parse(c.as_slice())?;
371 c.skip(consumed)?;
372 }
373 }
374
375 let locktime = c.read_u32_le()?;
376 Ok((version, locktime, input_count, output_count))
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 extern crate std;
383 use super::*;
384 use std::vec::Vec;
385
386 /// Minimal valid non-segwit coinbase transaction.
387 fn coinbase_tx_raw() -> Vec<u8> {
388 let mut tx = Vec::new();
389 // version
390 tx.extend_from_slice(&1i32.to_le_bytes());
391 // input count: 1
392 tx.push(1);
393 // outpoint: 32 zero bytes + 0xffffffff
394 tx.extend_from_slice(&[0u8; 32]);
395 tx.extend_from_slice(&0xffff_ffffu32.to_le_bytes());
396 // scriptSig length (4 bytes) + arbitrary data
397 tx.push(4);
398 tx.extend_from_slice(&[0xde, 0xad, 0xbe, 0xef]);
399 // sequence
400 tx.extend_from_slice(&0xffff_ffffu32.to_le_bytes());
401 // output count: 1
402 tx.push(1);
403 // value: 50 BTC in satoshis
404 tx.extend_from_slice(&(50u64 * 100_000_000).to_le_bytes());
405 // scriptPubKey: empty (non-standard, ok for test)
406 tx.push(0);
407 // locktime
408 tx.extend_from_slice(&0u32.to_le_bytes());
409 tx
410 }
411
412 #[test]
413 fn parse_coinbase_streaming() {
414 let raw = coinbase_tx_raw();
415 let mut parser = TransactionParser::new(&raw);
416 let mut inputs = 0usize;
417 let mut outputs = 0usize;
418 let mut saw_coinbase = false;
419
420 let (version, locktime, in_count, out_count) = parser
421 .parse_with(
422 |inp| {
423 inputs += 1;
424 if inp.is_coinbase() {
425 saw_coinbase = true;
426 }
427 Ok(())
428 },
429 |_out| {
430 outputs += 1;
431 Ok(())
432 },
433 )
434 .unwrap();
435
436 assert_eq!(version, 1);
437 assert_eq!(locktime, 0);
438 assert_eq!(in_count, 1);
439 assert_eq!(out_count, 1);
440 assert_eq!(inputs, 1);
441 assert_eq!(outputs, 1);
442 assert!(saw_coinbase);
443 }
444
445 #[test]
446 fn outpoint_coinbase_detection() {
447 let raw = [0u8; 36];
448 let mut raw = raw.to_vec();
449 raw[32..].copy_from_slice(&0xffff_ffffu32.to_le_bytes());
450 let mut c = Cursor::new(&raw);
451 let op = OutPoint::parse(&mut c).unwrap();
452 assert!(op.is_coinbase());
453 }
454}