Skip to main content

blockchain_zc_parser/
transaction.rs

1//! Bitcoin transaction parsing — zero-copy, no alloc.
2//!
3//! All structures borrow from the original input buffer via lifetime `'a`.
4
5use crate::{
6    cursor::Cursor,
7    error::{ParseError, ParseResult},
8    hash::Hash32,
9    script::Script,
10};
11
12/// Maximum number of inputs / outputs per transaction (protocol sanity limit).
13pub const MAX_IO_COUNT: usize = 100_000;
14/// Maximum witness items per input.
15pub const MAX_WITNESS_ITEMS: usize = 500;
16/// Maximum witness item size.
17pub const MAX_WITNESS_ITEM_SIZE: usize = 520;
18
19// ---------------------------------------------------------------------------
20// Outpoint
21// ---------------------------------------------------------------------------
22
23/// Reference to a specific output of a previous transaction.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub struct OutPoint<'a> {
26    /// TXID of the referenced transaction (32 bytes, zero-copy).
27    pub txid: Hash32<'a>,
28    /// Index of the output in that transaction.
29    pub vout: u32,
30}
31
32impl<'a> OutPoint<'a> {
33    /// Parse from cursor (36 bytes total).
34    #[inline]
35    pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
36        let txid = Hash32(c.read_array::<32>()?);
37        let vout = c.read_u32_le()?;
38        Ok(Self { txid, vout })
39    }
40
41    /// `true` for a coinbase input (all-zero txid, vout = 0xffffffff).
42    #[inline]
43    pub fn is_coinbase(&self) -> bool {
44        self.txid.as_bytes().iter().all(|&b| b == 0) && self.vout == 0xffff_ffff
45    }
46}
47
48// ---------------------------------------------------------------------------
49// TxInput
50// ---------------------------------------------------------------------------
51
52/// A transaction input, borrowing from the original buffer.
53#[derive(Debug, Clone, Copy)]
54pub struct TxInput<'a> {
55    /// Previous output being spent.
56    pub previous_output: OutPoint<'a>,
57    /// Unlocking script (scriptSig). Empty for SegWit inputs.
58    pub script_sig: Script<'a>,
59    /// Sequence number.
60    pub sequence: u32,
61}
62
63impl<'a> TxInput<'a> {
64    /// Parse one input from the cursor.
65    #[inline]
66    pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
67        let previous_output = OutPoint::parse(c)?;
68        let script_sig = Script::parse(c)?;
69        let sequence = c.read_u32_le()?;
70        Ok(Self {
71            previous_output,
72            script_sig,
73            sequence,
74        })
75    }
76
77    /// `true` if this is a coinbase input.
78    #[inline]
79    pub fn is_coinbase(&self) -> bool {
80        self.previous_output.is_coinbase()
81    }
82
83    /// `true` if this input opts into Replace-By-Fee (BIP 125).
84    #[inline]
85    pub fn is_rbf(&self) -> bool {
86        self.sequence <= 0xffff_fffd
87    }
88}
89
90// ---------------------------------------------------------------------------
91// TxOutput
92// ---------------------------------------------------------------------------
93
94/// A transaction output.
95#[derive(Debug, Clone, Copy)]
96pub struct TxOutput<'a> {
97    /// Satoshi value (unsigned).
98    pub value: u64,
99    /// Locking script (scriptPubKey).
100    pub script_pubkey: Script<'a>,
101}
102
103impl<'a> TxOutput<'a> {
104    /// Parse one output from the cursor.
105    #[inline]
106    pub fn parse(c: &mut Cursor<'a>) -> ParseResult<Self> {
107        let value = c.read_u64_le()?;
108        let script_pubkey = Script::parse(c)?;
109        Ok(Self {
110            value,
111            script_pubkey,
112        })
113    }
114}
115
116// ---------------------------------------------------------------------------
117// Witness
118// ---------------------------------------------------------------------------
119
120/// SegWit witness data for a single input — a sequence of byte-string items.
121///
122/// Stores a zero-copy slice covering the entire witness field (item count
123/// varint + all items), so items can be iterated on demand.
124#[derive(Debug, Clone, Copy)]
125pub struct Witness<'a> {
126    /// Raw witness bytes starting at the item-count varint.
127    pub raw: &'a [u8],
128    /// Number of items decoded from the item-count varint.
129    pub item_count: usize,
130}
131
132impl<'a> Witness<'a> {
133    /// Parse one witness from `data` (which should start at the item-count varint).
134    ///
135    /// Returns the parsed [`Witness`] and the number of bytes consumed, so the
136    /// caller can advance its cursor by exactly that amount.
137    pub(crate) fn parse(data: &'a [u8]) -> ParseResult<(Self, usize)> {
138        let mut c = Cursor::new(data);
139        let item_count_u64 = c.read_varint()?;
140        let item_count: usize =
141            item_count_u64
142                .try_into()
143                .map_err(|_| ParseError::IntegerTooLarge {
144                    value: item_count_u64,
145                })?;
146        if item_count > MAX_WITNESS_ITEMS {
147            return Err(ParseError::OversizedData {
148                size: item_count,
149                max: MAX_WITNESS_ITEMS,
150            });
151        }
152        for _ in 0..item_count {
153            c.read_var_bytes(MAX_WITNESS_ITEM_SIZE)?;
154        }
155        let consumed = c.position();
156        // SAFETY: `consumed` bytes were validated by the reads above,
157        // so `data[..consumed]` is a valid sub-slice.
158        let raw = unsafe { data.get_unchecked(..consumed) };
159        Ok((Witness { raw, item_count }, consumed))
160    }
161
162    /// Iterate over items in this witness.
163    pub fn items(&self) -> WitnessIter<'a> {
164        // Skip the item_count varint at the start of `raw`.
165        let mut skip_cursor = Cursor::new(self.raw);
166        let _ = skip_cursor.read_varint(); // can't fail — we validated on parse
167        WitnessIter {
168            cursor: Cursor::new(&self.raw[skip_cursor.position()..]),
169            remaining: self.item_count,
170        }
171    }
172}
173
174/// Iterator over witness items.
175pub struct WitnessIter<'a> {
176    cursor: Cursor<'a>,
177    remaining: usize,
178}
179
180impl<'a> Iterator for WitnessIter<'a> {
181    type Item = ParseResult<&'a [u8]>;
182
183    fn next(&mut self) -> Option<Self::Item> {
184        if self.remaining == 0 {
185            return None;
186        }
187        self.remaining -= 1;
188        Some(self.cursor.read_var_bytes(MAX_WITNESS_ITEM_SIZE))
189    }
190}
191
192// ---------------------------------------------------------------------------
193// Transaction
194// ---------------------------------------------------------------------------
195
196/// A fully parsed Bitcoin transaction.
197///
198/// **Note:** This struct requires the caller to provide backing storage for the
199/// `inputs`, `outputs`, and `witnesses` slices (e.g. stack arrays or arena
200/// buffers). For allocation-free use, prefer [`TransactionParser`] with its
201/// closure API.
202///
203/// All fields borrow from the original parse buffer — zero allocations in the
204/// parser itself.
205#[allow(dead_code)]
206pub struct Transaction<'a> {
207    /// Serialised version (1 or 2).
208    pub version: i32,
209    /// Whether this transaction uses the SegWit format (BIP 141).
210    pub is_segwit: bool,
211    /// Transaction inputs.
212    pub inputs: &'a [TxInput<'a>],
213    /// Transaction outputs.
214    pub outputs: &'a [TxOutput<'a>],
215    /// Witness data, one per input (empty slice for non-segwit).
216    pub witnesses: &'a [Witness<'a>],
217    /// Lock time.
218    pub locktime: u32,
219    /// Zero-copy reference to the raw bytes of this transaction.
220    pub raw: &'a [u8],
221}
222
223impl core::fmt::Debug for Transaction<'_> {
224    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
225        f.debug_struct("Transaction")
226            .field("version", &self.version)
227            .field("is_segwit", &self.is_segwit)
228            .field("input_count", &self.inputs.len())
229            .field("output_count", &self.outputs.len())
230            .field("locktime", &self.locktime)
231            .field("raw_len", &self.raw.len())
232            .finish()
233    }
234}
235
236// ---------------------------------------------------------------------------
237// Streaming parser (no alloc)
238// ---------------------------------------------------------------------------
239
240/// Streaming, callback-based transaction parser for `no_std` / zero-alloc use.
241///
242/// Instead of collecting inputs/outputs into a slice, this parser calls user-
243/// supplied closures for each element, allowing the caller to decide how to
244/// store or discard them.
245///
246/// ```rust
247/// # use blockchain_zc_parser::transaction::TransactionParser;
248/// # let raw = &[0u8; 0]; // placeholder
249/// let mut parser = TransactionParser::new(raw);
250/// // parser.parse_with(|input| { ... }, |output| { ... });
251/// ```
252pub struct TransactionParser<'a> {
253    cursor: Cursor<'a>,
254}
255
256impl<'a> TransactionParser<'a> {
257    /// Create a new parser positioned at the start of a raw transaction.
258    pub fn new(data: &'a [u8]) -> Self {
259        Self {
260            cursor: Cursor::new(data),
261        }
262    }
263
264    /// How many bytes have been consumed so far.
265    ///
266    /// Call this after [`parse_with`](Self::parse_with) to advance an outer
267    /// cursor by exactly the number of bytes this transaction occupied.
268    #[inline]
269    pub fn bytes_consumed(&self) -> usize {
270        self.cursor.position()
271    }
272
273    /// Parse the transaction, calling `on_input` for each input and
274    /// `on_output` for each output.
275    ///
276    /// Witness data is skipped unless you need it (saves work for indexers).
277    ///
278    /// Returns `(version, locktime, input_count, output_count)`.
279    pub fn parse_with<FI, FO>(
280        &mut self,
281        mut on_input: FI,
282        mut on_output: FO,
283    ) -> ParseResult<(i32, u32, usize, usize)>
284    where
285        FI: FnMut(TxInput<'a>) -> ParseResult<()>,
286        FO: FnMut(TxOutput<'a>) -> ParseResult<()>,
287    {
288        let c = &mut self.cursor;
289        let version = c.read_i32_le()?;
290
291        // Local helper for varint parsing from first byte
292        #[inline]
293        fn varint_from_first<'a>(c: &mut Cursor<'a>, first: u8) -> ParseResult<u64> {
294            Ok(match first {
295                0x00..=0xfc => first as u64,
296                0xfd => c.read_u16_le()? as u64,
297                0xfe => c.read_u32_le()? as u64,
298                0xff => c.read_u64_le()?,
299            })
300        }
301
302        // Detect SegWit marker and read input count
303        let first_byte = c.read_u8()?;
304        let is_segwit = if first_byte == 0x00 {
305            // Potential SegWit marker. In legacy format this would mean 0 inputs,
306            // which is invalid; we treat 0x00 0x01 as SegWit, otherwise error.
307            let flag = c.read_u8()?;
308            if flag != 0x01 {
309                return Err(ParseError::InvalidSegwitFlag(flag));
310            }
311            true
312        } else {
313            false
314        };
315
316        let input_count_u64 = if is_segwit {
317            c.read_varint()?
318        } else {
319            // `first_byte` is the first byte of the input-count varint.
320            varint_from_first(c, first_byte)?
321        };
322
323        let input_count: usize =
324            input_count_u64
325                .try_into()
326                .map_err(|_| ParseError::IntegerTooLarge {
327                    value: input_count_u64,
328                })?;
329
330        if input_count == 0 {
331            return Err(ParseError::InvalidInputCount);
332        }
333
334        if input_count > MAX_IO_COUNT {
335            return Err(ParseError::OversizedData {
336                size: input_count,
337                max: MAX_IO_COUNT,
338            });
339        }
340
341        for _ in 0..input_count {
342            let input = TxInput::parse(c)?;
343            on_input(input)?;
344        }
345
346        let output_count_u64 = c.read_varint()?;
347        let output_count: usize =
348            output_count_u64
349                .try_into()
350                .map_err(|_| ParseError::IntegerTooLarge {
351                    value: output_count_u64,
352                })?;
353        if output_count > MAX_IO_COUNT {
354            return Err(ParseError::OversizedData {
355                size: output_count,
356                max: MAX_IO_COUNT,
357            });
358        }
359
360        for _ in 0..output_count {
361            let output = TxOutput::parse(c)?;
362            on_output(output)?;
363        }
364
365        // Parse (and discard) witness data, using Witness::parse so the logic
366        // lives in one place.  Callers that need witness access can use the
367        // returned Witness values by adding an `on_witness` callback in future.
368        if is_segwit {
369            for _ in 0..input_count {
370                let (_, consumed) = Witness::parse(c.as_slice())?;
371                c.skip(consumed)?;
372            }
373        }
374
375        let locktime = c.read_u32_le()?;
376        Ok((version, locktime, input_count, output_count))
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    extern crate std;
383    use super::*;
384    use std::vec::Vec;
385
386    /// Minimal valid non-segwit coinbase transaction.
387    fn coinbase_tx_raw() -> Vec<u8> {
388        let mut tx = Vec::new();
389        // version
390        tx.extend_from_slice(&1i32.to_le_bytes());
391        // input count: 1
392        tx.push(1);
393        // outpoint: 32 zero bytes + 0xffffffff
394        tx.extend_from_slice(&[0u8; 32]);
395        tx.extend_from_slice(&0xffff_ffffu32.to_le_bytes());
396        // scriptSig length (4 bytes) + arbitrary data
397        tx.push(4);
398        tx.extend_from_slice(&[0xde, 0xad, 0xbe, 0xef]);
399        // sequence
400        tx.extend_from_slice(&0xffff_ffffu32.to_le_bytes());
401        // output count: 1
402        tx.push(1);
403        // value: 50 BTC in satoshis
404        tx.extend_from_slice(&(50u64 * 100_000_000).to_le_bytes());
405        // scriptPubKey: empty (non-standard, ok for test)
406        tx.push(0);
407        // locktime
408        tx.extend_from_slice(&0u32.to_le_bytes());
409        tx
410    }
411
412    #[test]
413    fn parse_coinbase_streaming() {
414        let raw = coinbase_tx_raw();
415        let mut parser = TransactionParser::new(&raw);
416        let mut inputs = 0usize;
417        let mut outputs = 0usize;
418        let mut saw_coinbase = false;
419
420        let (version, locktime, in_count, out_count) = parser
421            .parse_with(
422                |inp| {
423                    inputs += 1;
424                    if inp.is_coinbase() {
425                        saw_coinbase = true;
426                    }
427                    Ok(())
428                },
429                |_out| {
430                    outputs += 1;
431                    Ok(())
432                },
433            )
434            .unwrap();
435
436        assert_eq!(version, 1);
437        assert_eq!(locktime, 0);
438        assert_eq!(in_count, 1);
439        assert_eq!(out_count, 1);
440        assert_eq!(inputs, 1);
441        assert_eq!(outputs, 1);
442        assert!(saw_coinbase);
443    }
444
445    #[test]
446    fn outpoint_coinbase_detection() {
447        let raw = [0u8; 36];
448        let mut raw = raw.to_vec();
449        raw[32..].copy_from_slice(&0xffff_ffffu32.to_le_bytes());
450        let mut c = Cursor::new(&raw);
451        let op = OutPoint::parse(&mut c).unwrap();
452        assert!(op.is_coinbase());
453    }
454}