Skip to main content

blvm_primitives/serialization/
transaction.rs

1//! Transaction wire format serialization/deserialization
2//!
3//! Bitcoin transaction wire format specification.
4//! Must match Bitcoin protocol serialization exactly for consensus compatibility.
5
6use super::varint::{decode_varint, encode_varint};
7use crate::error::{ConsensusError, Result};
8use crate::types::*;
9use std::borrow::Cow;
10
11#[cfg(feature = "production")]
12use smallvec::SmallVec;
13
14/// Error type for transaction parsing failures
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum TransactionParseError {
17    InsufficientBytes,
18    InvalidVersion,
19    InvalidInputCount,
20    InvalidOutputCount,
21    InvalidScriptLength,
22    InvalidLockTime,
23}
24
25impl std::fmt::Display for TransactionParseError {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match self {
28            TransactionParseError::InsufficientBytes => {
29                write!(f, "Insufficient bytes to parse transaction")
30            }
31            TransactionParseError::InvalidVersion => write!(f, "Invalid transaction version"),
32            TransactionParseError::InvalidInputCount => write!(f, "Invalid input count"),
33            TransactionParseError::InvalidOutputCount => write!(f, "Invalid output count"),
34            TransactionParseError::InvalidScriptLength => write!(f, "Invalid script length"),
35            TransactionParseError::InvalidLockTime => write!(f, "Invalid lock time"),
36        }
37    }
38}
39
40impl std::error::Error for TransactionParseError {}
41
42#[inline]
43fn checked_slice_end(offset: usize, len: u64) -> Result<usize> {
44    let len = usize::try_from(len).map_err(|_| {
45        ConsensusError::Serialization(Cow::Owned(
46            TransactionParseError::InvalidScriptLength.to_string(),
47        ))
48    })?;
49    offset.checked_add(len).ok_or_else(|| {
50        ConsensusError::Serialization(Cow::Owned(
51            TransactionParseError::InsufficientBytes.to_string(),
52        ))
53    })
54}
55
56/// After the 4-byte `version`, read the compact input count and optional segwit wrapper.
57///
58/// Matches [`UnserializeTransaction`] in Bitcoin Core: the first varint is always the input count.
59/// If it is zero, the next byte is an optional-features flag. `1` means BIP141 extended encoding
60/// (marker was absorbed into the empty `vin` vector); the real input-count varint follows. Flag `0`
61/// means no extension: `vin` and `vout` both stay empty and **no** output-count varint appears on
62/// the wire before `lock_time`.
63///
64/// **Do not** detect segwit by peeking `0x00 0x01` before decoding that first varint: the compact
65/// encoding of the input count is not always a single `0x00` byte, so peeking mis-aligns the stream.
66fn read_tx_input_count_after_version(
67    data: &[u8],
68    mut offset: usize,
69) -> Result<(bool, u64, usize, bool)> {
70    let (mut input_count, varint_len) = decode_varint(&data[offset..])?;
71    offset += varint_len;
72
73    if input_count > 1_000_000 {
74        return Err(ConsensusError::Serialization(Cow::Owned(
75            TransactionParseError::InvalidInputCount.to_string(),
76        )));
77    }
78
79    let mut is_segwit = false;
80    // When true, Bitcoin Core left vout empty without reading a vector length (flag byte was 0).
81    let mut implicit_empty_outputs = false;
82
83    if input_count == 0 {
84        if offset >= data.len() {
85            return Err(ConsensusError::Serialization(Cow::Owned(
86                TransactionParseError::InsufficientBytes.to_string(),
87            )));
88        }
89        let flag = data[offset];
90        offset += 1;
91
92        if flag == 0 {
93            implicit_empty_outputs = true;
94            return Ok((false, 0, offset, implicit_empty_outputs));
95        }
96
97        if flag != 1 {
98            return Err(ConsensusError::Serialization(Cow::Owned(format!(
99                "Unsupported segwit transaction flag: {flag}"
100            ))));
101        }
102
103        is_segwit = true;
104
105        let (ic2, vl2) = decode_varint(&data[offset..])?;
106        offset += vl2;
107        if ic2 > 1_000_000 {
108            return Err(ConsensusError::Serialization(Cow::Owned(
109                TransactionParseError::InvalidInputCount.to_string(),
110            )));
111        }
112        input_count = ic2;
113    }
114
115    Ok((is_segwit, input_count, offset, implicit_empty_outputs))
116}
117
118/// Serialize a transaction to Bitcoin wire format
119#[inline(always)]
120pub fn serialize_transaction(tx: &Transaction) -> Vec<u8> {
121    let mut result = Vec::new();
122    serialize_transaction_append(&mut result, tx);
123    result
124}
125
126/// Append serialized transaction to buffer (shared logic for into/inner).
127///
128/// When `vin` is empty and `vout` is non-empty, legacy `compact_size(0) || compact_size(n)` would
129/// serialize as `0x00 0x01…`, which our witness-aware deserializer (matching Bitcoin Core with
130/// witnesses allowed) reads as empty `vin` + **flag** `0x01`, not as `vout` count. Emit extended
131/// framing: dummy empty `vin`, flag `0x01`, real input count, then `vout` (see `SerializeTransaction`
132/// in Bitcoin Core).
133#[inline(always)]
134fn serialize_transaction_append(result: &mut Vec<u8>, tx: &Transaction) {
135    result.extend_from_slice(&(tx.version as i32).to_le_bytes());
136
137    if tx.inputs.is_empty() && !tx.outputs.is_empty() {
138        result.extend_from_slice(&encode_varint(0));
139        result.push(0x01);
140        result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
141    } else {
142        result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
143    }
144
145    for input in &tx.inputs {
146        result.extend_from_slice(&input.prevout.hash);
147        result.extend_from_slice(&input.prevout.index.to_le_bytes());
148        result.extend_from_slice(&encode_varint(input.script_sig.len() as u64));
149        result.extend_from_slice(&input.script_sig);
150        result.extend_from_slice(&(input.sequence as u32).to_le_bytes());
151    }
152
153    result.extend_from_slice(&encode_varint(tx.outputs.len() as u64));
154
155    for output in &tx.outputs {
156        result.extend_from_slice(&(output.value as u64).to_le_bytes());
157        result.extend_from_slice(&encode_varint(output.script_pubkey.len() as u64));
158        result.extend_from_slice(&output.script_pubkey);
159    }
160
161    result.extend_from_slice(&(tx.lock_time as u32).to_le_bytes());
162}
163
164/// Serialize transaction into an existing buffer
165#[inline(always)]
166pub fn serialize_transaction_into(dst: &mut Vec<u8>, tx: &Transaction) -> usize {
167    dst.clear();
168    serialize_transaction_append(dst, tx);
169    dst.len()
170}
171
172/// Serialize a transaction in SegWit wire format
173pub fn serialize_transaction_with_witness(tx: &Transaction, witnesses: &[Witness]) -> Vec<u8> {
174    assert_eq!(
175        witnesses.len(),
176        tx.inputs.len(),
177        "witness count must match input count"
178    );
179    let mut result = Vec::new();
180    result.extend_from_slice(&(tx.version as i32).to_le_bytes());
181    result.push(0x00);
182    result.push(0x01);
183    result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
184    for input in &tx.inputs {
185        result.extend_from_slice(&input.prevout.hash);
186        result.extend_from_slice(&input.prevout.index.to_le_bytes());
187        result.extend_from_slice(&encode_varint(input.script_sig.len() as u64));
188        result.extend_from_slice(&input.script_sig);
189        result.extend_from_slice(&(input.sequence as u32).to_le_bytes());
190    }
191    result.extend_from_slice(&encode_varint(tx.outputs.len() as u64));
192    for output in &tx.outputs {
193        result.extend_from_slice(&(output.value as u64).to_le_bytes());
194        result.extend_from_slice(&encode_varint(output.script_pubkey.len() as u64));
195        result.extend_from_slice(&output.script_pubkey);
196    }
197    for witness in witnesses {
198        result.extend_from_slice(&encode_varint(witness.len() as u64));
199        for element in witness {
200            result.extend_from_slice(&encode_varint(element.len() as u64));
201            result.extend_from_slice(element);
202        }
203    }
204    result.extend_from_slice(&(tx.lock_time as u32).to_le_bytes());
205    result
206}
207
208/// Deserialize a transaction from Bitcoin wire format.
209///
210/// Witness data (if present) is validated but not returned; use
211/// [`deserialize_transaction_with_witness`] when witness stacks are needed.
212pub fn deserialize_transaction(data: &[u8]) -> Result<Transaction> {
213    deserialize_transaction_with_witness(data).map(|(tx, _, _)| tx)
214}
215
216// ── Private parsing helpers ───────────────────────────────────────────────────
217
218/// The concrete vector type used for transaction inputs/outputs at runtime.
219/// SmallVec in production builds to avoid heap allocation for ≤2 elements;
220/// plain Vec in non-production builds for simplicity.
221#[cfg(feature = "production")]
222type TxInputVec = SmallVec<[TransactionInput; 2]>;
223#[cfg(not(feature = "production"))]
224type TxInputVec = Vec<TransactionInput>;
225
226#[cfg(feature = "production")]
227type TxOutputVec = SmallVec<[TransactionOutput; 2]>;
228#[cfg(not(feature = "production"))]
229type TxOutputVec = Vec<TransactionOutput>;
230
231/// Parse `count` transaction inputs starting at `*offset`, advancing it in place.
232#[inline]
233fn parse_inputs(data: &[u8], offset: &mut usize, count: u64) -> Result<TxInputVec> {
234    let mut inputs = TxInputVec::new();
235
236    for _ in 0..count {
237        if data.len() < *offset + 36 {
238            return Err(ConsensusError::Serialization(Cow::Owned(
239                TransactionParseError::InsufficientBytes.to_string(),
240            )));
241        }
242        let mut hash = [0u8; 32];
243        hash.copy_from_slice(&data[*offset..*offset + 32]);
244        *offset += 32;
245
246        let index = u32::from_le_bytes([
247            data[*offset],
248            data[*offset + 1],
249            data[*offset + 2],
250            data[*offset + 3],
251        ]);
252        *offset += 4;
253
254        let (script_len, varint_len) = decode_varint(&data[*offset..])?;
255        *offset += varint_len;
256
257        let script_sig_end = checked_slice_end(*offset, script_len)?;
258        if data.len() < script_sig_end {
259            return Err(ConsensusError::Serialization(Cow::Owned(
260                TransactionParseError::InsufficientBytes.to_string(),
261            )));
262        }
263        let script_sig = data[*offset..script_sig_end].to_vec();
264        *offset = script_sig_end;
265
266        if data.len() < *offset + 4 {
267            return Err(ConsensusError::Serialization(Cow::Owned(
268                TransactionParseError::InsufficientBytes.to_string(),
269            )));
270        }
271        let sequence = u32::from_le_bytes([
272            data[*offset],
273            data[*offset + 1],
274            data[*offset + 2],
275            data[*offset + 3],
276        ]) as u64;
277        *offset += 4;
278
279        inputs.push(TransactionInput {
280            prevout: OutPoint { hash, index },
281            script_sig,
282            sequence,
283        });
284    }
285
286    Ok(inputs)
287}
288
289/// Read the output-count varint (or return 0 on implicit-empty-outputs), then parse
290/// all transaction outputs, advancing `*offset` in place.
291#[inline]
292fn parse_outputs(
293    data: &[u8],
294    offset: &mut usize,
295    implicit_empty_outputs: bool,
296) -> Result<TxOutputVec> {
297    let output_count = if implicit_empty_outputs {
298        0
299    } else {
300        let (output_count, varint_len) = decode_varint(&data[*offset..])?;
301        *offset += varint_len;
302
303        if output_count > 1_000_000 {
304            return Err(ConsensusError::Serialization(Cow::Owned(
305                TransactionParseError::InvalidOutputCount.to_string(),
306            )));
307        }
308        output_count
309    };
310
311    let mut outputs = TxOutputVec::new();
312
313    for _ in 0..output_count {
314        if data.len() < *offset + 8 {
315            return Err(ConsensusError::Serialization(Cow::Owned(
316                TransactionParseError::InsufficientBytes.to_string(),
317            )));
318        }
319        let value = i64::from_le_bytes([
320            data[*offset],
321            data[*offset + 1],
322            data[*offset + 2],
323            data[*offset + 3],
324            data[*offset + 4],
325            data[*offset + 5],
326            data[*offset + 6],
327            data[*offset + 7],
328        ]);
329        *offset += 8;
330
331        let (script_len, varint_len) = decode_varint(&data[*offset..])?;
332        *offset += varint_len;
333
334        let script_pubkey_end = checked_slice_end(*offset, script_len)?;
335        if data.len() < script_pubkey_end {
336            return Err(ConsensusError::Serialization(Cow::Owned(
337                TransactionParseError::InsufficientBytes.to_string(),
338            )));
339        }
340        let script_pubkey = data[*offset..script_pubkey_end].to_vec();
341        *offset = script_pubkey_end;
342
343        outputs.push(TransactionOutput { value, script_pubkey });
344    }
345
346    Ok(outputs)
347}
348
349// ─────────────────────────────────────────────────────────────────────────────
350
351/// Deserialize a transaction, returning (tx, bytes_consumed). Convenience wrapper that discards witness data.
352pub fn deserialize_transaction_with_offset(data: &[u8]) -> Result<(Transaction, usize)> {
353    let (tx, _witnesses, bytes_consumed) = deserialize_transaction_with_witness(data)?;
354    Ok((tx, bytes_consumed))
355}
356
357/// Deserialize a transaction from Bitcoin wire format, returning transaction, witness, and bytes consumed
358pub fn deserialize_transaction_with_witness(
359    data: &[u8],
360) -> Result<(Transaction, Vec<Witness>, usize)> {
361    let mut offset = 0;
362
363    if data.len() < offset + 4 {
364        return Err(ConsensusError::Serialization(Cow::Owned(
365            TransactionParseError::InsufficientBytes.to_string(),
366        )));
367    }
368    let version = i32::from_le_bytes([
369        data[offset],
370        data[offset + 1],
371        data[offset + 2],
372        data[offset + 3],
373    ]) as u64;
374    offset += 4;
375
376    let (is_segwit, input_count, mut offset, implicit_empty_outputs) =
377        read_tx_input_count_after_version(data, offset)?;
378
379    let inputs = parse_inputs(data, &mut offset, input_count)?;
380    let outputs = parse_outputs(data, &mut offset, implicit_empty_outputs)?;
381
382    let mut all_witnesses: Vec<Witness> = Vec::new();
383    if is_segwit {
384        for _ in 0..input_count {
385            let (stack_count, varint_len) = decode_varint(&data[offset..])?;
386            offset += varint_len;
387
388            let mut witness_stack: Witness = Vec::new();
389            for _ in 0..stack_count {
390                let (item_len, varint_len) = decode_varint(&data[offset..])?;
391                offset += varint_len;
392
393                let item_end = checked_slice_end(offset, item_len)?;
394                if data.len() < item_end {
395                    return Err(ConsensusError::Serialization(Cow::Owned(
396                        TransactionParseError::InsufficientBytes.to_string(),
397                    )));
398                }
399                witness_stack.push(data[offset..item_end].to_vec());
400                offset = item_end;
401            }
402            all_witnesses.push(witness_stack);
403        }
404    } else {
405        for _ in 0..input_count {
406            all_witnesses.push(Vec::new());
407        }
408    }
409
410    if data.len() < offset + 4 {
411        return Err(ConsensusError::Serialization(Cow::Owned(
412            TransactionParseError::InsufficientBytes.to_string(),
413        )));
414    }
415    let lock_time = u32::from_le_bytes([
416        data[offset],
417        data[offset + 1],
418        data[offset + 2],
419        data[offset + 3],
420    ]) as u64;
421    offset += 4;
422
423    let tx = Transaction {
424        version,
425        inputs,
426        outputs,
427        lock_time,
428    };
429
430    Ok((tx, all_witnesses, offset))
431}
432
433#[cfg(test)]
434mod tests {
435    use super::*;
436
437    #[test]
438    fn test_serialize_deserialize_round_trip() {
439        let tx = Transaction {
440            version: 1,
441            inputs: crate::tx_inputs![TransactionInput {
442                prevout: OutPoint {
443                    hash: [1; 32],
444                    index: 0
445                },
446                script_sig: vec![0x51],
447                sequence: 0xffffffff,
448            }],
449            outputs: crate::tx_outputs![TransactionOutput {
450                value: 5000000000,
451                script_pubkey: vec![0x51],
452            }],
453            lock_time: 0,
454        };
455
456        let serialized = serialize_transaction(&tx);
457        let deserialized = deserialize_transaction(&serialized).unwrap();
458
459        assert_eq!(deserialized.version, tx.version);
460        assert_eq!(deserialized.inputs.len(), tx.inputs.len());
461        assert_eq!(deserialized.outputs.len(), tx.outputs.len());
462        assert_eq!(deserialized.lock_time, tx.lock_time);
463    }
464
465    /// Bitcoin Core: empty `vin` + flag `0` implies empty `vout` without a separate output-count read.
466    #[test]
467    fn empty_tx_round_trip_matches_double_zero_preamble() {
468        let tx = Transaction {
469            version: 1,
470            inputs: crate::tx_inputs![],
471            outputs: crate::tx_outputs![],
472            lock_time: 0,
473        };
474        let bytes = serialize_transaction(&tx);
475        let back = deserialize_transaction(&bytes).unwrap();
476        assert_eq!(back.version, tx.version);
477        assert!(back.inputs.is_empty());
478        assert!(back.outputs.is_empty());
479        assert_eq!(back.lock_time, tx.lock_time);
480        // version(4) + vin=0 + flags=0 + locktime(4) — two 0x00 bytes after version
481        assert_eq!(&bytes[4..6], &[0u8, 0u8]);
482    }
483
484    #[test]
485    fn zero_inputs_one_output_round_trips_extended_framing() {
486        let tx = Transaction {
487            version: 2,
488            inputs: crate::tx_inputs![],
489            outputs: crate::tx_outputs![TransactionOutput {
490                value: 1000,
491                script_pubkey: vec![0x51],
492            }],
493            lock_time: 0x11223344,
494        };
495        let bytes = serialize_transaction(&tx);
496        let back = deserialize_transaction(&bytes).unwrap();
497        assert_eq!(back.version, tx.version);
498        assert!(back.inputs.is_empty());
499        assert_eq!(back.outputs.len(), 1);
500        assert_eq!(back.outputs[0].value, 1000);
501        assert_eq!(back.lock_time, tx.lock_time);
502        // version(4) + 0x00 dummy vin + 0x01 flag + 0x00 real ic + vout count + ...
503        assert_eq!(&bytes[4..8], &[0u8, 1, 0, 1]);
504    }
505}