Skip to main content

blvm_primitives/serialization/
transaction.rs

1//! Transaction wire format serialization/deserialization
2//!
3//! Bitcoin transaction wire format specification.
4//! Must match Bitcoin protocol serialization exactly for consensus compatibility.
5
6use super::varint::{decode_varint, encode_varint};
7use crate::error::{ConsensusError, Result};
8use crate::types::*;
9use std::borrow::Cow;
10
11#[cfg(feature = "production")]
12use smallvec::SmallVec;
13
14/// Error type for transaction parsing failures
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum TransactionParseError {
17    InsufficientBytes,
18    InvalidVersion,
19    InvalidInputCount,
20    InvalidOutputCount,
21    InvalidScriptLength,
22    InvalidLockTime,
23}
24
25impl std::fmt::Display for TransactionParseError {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match self {
28            TransactionParseError::InsufficientBytes => {
29                write!(f, "Insufficient bytes to parse transaction")
30            }
31            TransactionParseError::InvalidVersion => write!(f, "Invalid transaction version"),
32            TransactionParseError::InvalidInputCount => write!(f, "Invalid input count"),
33            TransactionParseError::InvalidOutputCount => write!(f, "Invalid output count"),
34            TransactionParseError::InvalidScriptLength => write!(f, "Invalid script length"),
35            TransactionParseError::InvalidLockTime => write!(f, "Invalid lock time"),
36        }
37    }
38}
39
40impl std::error::Error for TransactionParseError {}
41
42#[inline]
43fn checked_slice_end(offset: usize, len: u64) -> Result<usize> {
44    let len = usize::try_from(len).map_err(|_| {
45        ConsensusError::Serialization(Cow::Owned(
46            TransactionParseError::InvalidScriptLength.to_string(),
47        ))
48    })?;
49    offset.checked_add(len).ok_or_else(|| {
50        ConsensusError::Serialization(Cow::Owned(
51            TransactionParseError::InsufficientBytes.to_string(),
52        ))
53    })
54}
55
56/// After the 4-byte `version`, read the compact input count and optional segwit wrapper.
57///
58/// Matches [`UnserializeTransaction`] in Bitcoin Core: the first varint is always the input count.
59/// If it is zero, the next byte is an optional-features flag. `1` means BIP141 extended encoding
60/// (marker was absorbed into the empty `vin` vector); the real input-count varint follows. Flag `0`
61/// means no extension: `vin` and `vout` both stay empty and **no** output-count varint appears on
62/// the wire before `lock_time`.
63///
64/// **Do not** detect segwit by peeking `0x00 0x01` before decoding that first varint: the compact
65/// encoding of the input count is not always a single `0x00` byte, so peeking mis-aligns the stream.
66fn read_tx_input_count_after_version(
67    data: &[u8],
68    mut offset: usize,
69) -> Result<(bool, u64, usize, bool)> {
70    let (mut input_count, varint_len) = decode_varint(&data[offset..])?;
71    offset += varint_len;
72
73    if input_count > 1_000_000 {
74        return Err(ConsensusError::Serialization(Cow::Owned(
75            TransactionParseError::InvalidInputCount.to_string(),
76        )));
77    }
78
79    let mut is_segwit = false;
80    // When true, Bitcoin Core left vout empty without reading a vector length (flag byte was 0).
81    let mut implicit_empty_outputs = false;
82
83    if input_count == 0 {
84        if offset >= data.len() {
85            return Err(ConsensusError::Serialization(Cow::Owned(
86                TransactionParseError::InsufficientBytes.to_string(),
87            )));
88        }
89        let flag = data[offset];
90        offset += 1;
91
92        if flag == 0 {
93            implicit_empty_outputs = true;
94            return Ok((false, 0, offset, implicit_empty_outputs));
95        }
96
97        if flag != 1 {
98            return Err(ConsensusError::Serialization(Cow::Owned(format!(
99                "Unsupported segwit transaction flag: {flag}"
100            ))));
101        }
102
103        is_segwit = true;
104
105        let (ic2, vl2) = decode_varint(&data[offset..])?;
106        offset += vl2;
107        if ic2 > 1_000_000 {
108            return Err(ConsensusError::Serialization(Cow::Owned(
109                TransactionParseError::InvalidInputCount.to_string(),
110            )));
111        }
112        input_count = ic2;
113    }
114
115    Ok((is_segwit, input_count, offset, implicit_empty_outputs))
116}
117
118/// Serialize a transaction to Bitcoin wire format
119#[inline(always)]
120pub fn serialize_transaction(tx: &Transaction) -> Vec<u8> {
121    let mut result = Vec::new();
122    serialize_transaction_append(&mut result, tx);
123    result
124}
125
126/// Append serialized transaction to buffer (shared logic for into/inner).
127///
128/// When `vin` is empty and `vout` is non-empty, legacy `compact_size(0) || compact_size(n)` would
129/// serialize as `0x00 0x01…`, which our witness-aware deserializer (matching Bitcoin Core with
130/// witnesses allowed) reads as empty `vin` + **flag** `0x01`, not as `vout` count. Emit extended
131/// framing: dummy empty `vin`, flag `0x01`, real input count, then `vout` (see `SerializeTransaction`
132/// in Bitcoin Core).
133#[inline(always)]
134fn serialize_transaction_append(result: &mut Vec<u8>, tx: &Transaction) {
135    result.extend_from_slice(&(tx.version as i32).to_le_bytes());
136
137    if tx.inputs.is_empty() && !tx.outputs.is_empty() {
138        result.extend_from_slice(&encode_varint(0));
139        result.push(0x01);
140        result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
141    } else {
142        result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
143    }
144
145    for input in &tx.inputs {
146        result.extend_from_slice(&input.prevout.hash);
147        result.extend_from_slice(&input.prevout.index.to_le_bytes());
148        result.extend_from_slice(&encode_varint(input.script_sig.len() as u64));
149        result.extend_from_slice(&input.script_sig);
150        result.extend_from_slice(&(input.sequence as u32).to_le_bytes());
151    }
152
153    result.extend_from_slice(&encode_varint(tx.outputs.len() as u64));
154
155    for output in &tx.outputs {
156        result.extend_from_slice(&(output.value as u64).to_le_bytes());
157        result.extend_from_slice(&encode_varint(output.script_pubkey.len() as u64));
158        result.extend_from_slice(&output.script_pubkey);
159    }
160
161    result.extend_from_slice(&(tx.lock_time as u32).to_le_bytes());
162}
163
164/// Serialize transaction into an existing buffer
165#[inline(always)]
166pub fn serialize_transaction_into(dst: &mut Vec<u8>, tx: &Transaction) -> usize {
167    dst.clear();
168    serialize_transaction_append(dst, tx);
169    dst.len()
170}
171
172/// Serialize a transaction in SegWit wire format
173pub fn serialize_transaction_with_witness(tx: &Transaction, witnesses: &[Witness]) -> Vec<u8> {
174    assert_eq!(
175        witnesses.len(),
176        tx.inputs.len(),
177        "witness count must match input count"
178    );
179    let mut result = Vec::new();
180    result.extend_from_slice(&(tx.version as i32).to_le_bytes());
181    result.push(0x00);
182    result.push(0x01);
183    result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
184    for input in &tx.inputs {
185        result.extend_from_slice(&input.prevout.hash);
186        result.extend_from_slice(&input.prevout.index.to_le_bytes());
187        result.extend_from_slice(&encode_varint(input.script_sig.len() as u64));
188        result.extend_from_slice(&input.script_sig);
189        result.extend_from_slice(&(input.sequence as u32).to_le_bytes());
190    }
191    result.extend_from_slice(&encode_varint(tx.outputs.len() as u64));
192    for output in &tx.outputs {
193        result.extend_from_slice(&(output.value as u64).to_le_bytes());
194        result.extend_from_slice(&encode_varint(output.script_pubkey.len() as u64));
195        result.extend_from_slice(&output.script_pubkey);
196    }
197    for witness in witnesses {
198        result.extend_from_slice(&encode_varint(witness.len() as u64));
199        for element in witness {
200            result.extend_from_slice(&encode_varint(element.len() as u64));
201            result.extend_from_slice(element);
202        }
203    }
204    result.extend_from_slice(&(tx.lock_time as u32).to_le_bytes());
205    result
206}
207
208/// Deserialize a transaction from Bitcoin wire format
209pub fn deserialize_transaction(data: &[u8]) -> Result<Transaction> {
210    let mut offset = 0;
211
212    if data.len() < offset + 4 {
213        return Err(ConsensusError::Serialization(Cow::Owned(
214            TransactionParseError::InsufficientBytes.to_string(),
215        )));
216    }
217    let version = i32::from_le_bytes([
218        data[offset],
219        data[offset + 1],
220        data[offset + 2],
221        data[offset + 3],
222    ]) as u64;
223    offset += 4;
224
225    let (is_segwit, input_count, mut offset, implicit_empty_outputs) =
226        read_tx_input_count_after_version(data, offset)?;
227
228    #[cfg(feature = "production")]
229    let mut inputs = SmallVec::<[TransactionInput; 2]>::new();
230    #[cfg(not(feature = "production"))]
231    let mut inputs = Vec::new();
232
233    for _ in 0..input_count {
234        if data.len() < offset + 36 {
235            return Err(ConsensusError::Serialization(Cow::Owned(
236                TransactionParseError::InsufficientBytes.to_string(),
237            )));
238        }
239        let mut hash = [0u8; 32];
240        hash.copy_from_slice(&data[offset..offset + 32]);
241        offset += 32;
242
243        let index = u32::from_le_bytes([
244            data[offset],
245            data[offset + 1],
246            data[offset + 2],
247            data[offset + 3],
248        ]);
249        offset += 4;
250
251        let (script_len, varint_len) = decode_varint(&data[offset..])?;
252        offset += varint_len;
253
254        let script_sig_end = checked_slice_end(offset, script_len)?;
255        if data.len() < script_sig_end {
256            return Err(ConsensusError::Serialization(Cow::Owned(
257                TransactionParseError::InsufficientBytes.to_string(),
258            )));
259        }
260        let script_sig = data[offset..script_sig_end].to_vec();
261        offset = script_sig_end;
262
263        if data.len() < offset + 4 {
264            return Err(ConsensusError::Serialization(Cow::Owned(
265                TransactionParseError::InsufficientBytes.to_string(),
266            )));
267        }
268        let sequence = u32::from_le_bytes([
269            data[offset],
270            data[offset + 1],
271            data[offset + 2],
272            data[offset + 3],
273        ]) as u64;
274        offset += 4;
275
276        inputs.push(TransactionInput {
277            prevout: OutPoint { hash, index },
278            script_sig,
279            sequence,
280        });
281    }
282
283    let output_count = if implicit_empty_outputs {
284        0
285    } else {
286        let (output_count, varint_len) = decode_varint(&data[offset..])?;
287        offset += varint_len;
288
289        if output_count > 1000000 {
290            return Err(ConsensusError::Serialization(Cow::Owned(
291                TransactionParseError::InvalidOutputCount.to_string(),
292            )));
293        }
294        output_count
295    };
296
297    #[cfg(feature = "production")]
298    let mut outputs = SmallVec::<[TransactionOutput; 2]>::new();
299    #[cfg(not(feature = "production"))]
300    let mut outputs = Vec::new();
301
302    for _ in 0..output_count {
303        if data.len() < offset + 8 {
304            return Err(ConsensusError::Serialization(Cow::Owned(
305                TransactionParseError::InsufficientBytes.to_string(),
306            )));
307        }
308        let value = i64::from_le_bytes([
309            data[offset],
310            data[offset + 1],
311            data[offset + 2],
312            data[offset + 3],
313            data[offset + 4],
314            data[offset + 5],
315            data[offset + 6],
316            data[offset + 7],
317        ]);
318        offset += 8;
319
320        let (script_len, varint_len) = decode_varint(&data[offset..])?;
321        offset += varint_len;
322
323        let script_pubkey_end = checked_slice_end(offset, script_len)?;
324        if data.len() < script_pubkey_end {
325            return Err(ConsensusError::Serialization(Cow::Owned(
326                TransactionParseError::InsufficientBytes.to_string(),
327            )));
328        }
329        let script_pubkey = data[offset..script_pubkey_end].to_vec();
330        offset = script_pubkey_end;
331
332        outputs.push(TransactionOutput {
333            value,
334            script_pubkey,
335        });
336    }
337
338    if is_segwit {
339        for _ in 0..input_count {
340            let (stack_count, varint_len) = decode_varint(&data[offset..])?;
341            offset += varint_len;
342            for _ in 0..stack_count {
343                let (item_len, varint_len) = decode_varint(&data[offset..])?;
344                offset += varint_len;
345                let item_end = checked_slice_end(offset, item_len)?;
346                if data.len() < item_end {
347                    return Err(ConsensusError::Serialization(Cow::Owned(
348                        TransactionParseError::InsufficientBytes.to_string(),
349                    )));
350                }
351                offset = item_end;
352            }
353        }
354    }
355
356    if data.len() < offset + 4 {
357        return Err(ConsensusError::Serialization(Cow::Owned(
358            TransactionParseError::InsufficientBytes.to_string(),
359        )));
360    }
361    let lock_time = u32::from_le_bytes([
362        data[offset],
363        data[offset + 1],
364        data[offset + 2],
365        data[offset + 3],
366    ]) as u64;
367
368    Ok(Transaction {
369        version,
370        inputs,
371        outputs,
372        lock_time,
373    })
374}
375
376/// Deserialize a transaction, returning (tx, bytes_consumed). Convenience wrapper that discards witness data.
377pub fn deserialize_transaction_with_offset(data: &[u8]) -> Result<(Transaction, usize)> {
378    let (tx, _witnesses, bytes_consumed) = deserialize_transaction_with_witness(data)?;
379    Ok((tx, bytes_consumed))
380}
381
382/// Deserialize a transaction from Bitcoin wire format, returning transaction, witness, and bytes consumed
383pub fn deserialize_transaction_with_witness(
384    data: &[u8],
385) -> Result<(Transaction, Vec<Witness>, usize)> {
386    let mut offset = 0;
387
388    if data.len() < offset + 4 {
389        return Err(ConsensusError::Serialization(Cow::Owned(
390            TransactionParseError::InsufficientBytes.to_string(),
391        )));
392    }
393    let version = i32::from_le_bytes([
394        data[offset],
395        data[offset + 1],
396        data[offset + 2],
397        data[offset + 3],
398    ]) as u64;
399    offset += 4;
400
401    let (is_segwit, input_count, mut offset, implicit_empty_outputs) =
402        read_tx_input_count_after_version(data, offset)?;
403
404    #[cfg(feature = "production")]
405    let mut inputs = SmallVec::<[TransactionInput; 2]>::new();
406    #[cfg(not(feature = "production"))]
407    let mut inputs = Vec::new();
408
409    for _ in 0..input_count {
410        if data.len() < offset + 36 {
411            return Err(ConsensusError::Serialization(Cow::Owned(
412                TransactionParseError::InsufficientBytes.to_string(),
413            )));
414        }
415        let mut hash = [0u8; 32];
416        hash.copy_from_slice(&data[offset..offset + 32]);
417        offset += 32;
418
419        let index = u32::from_le_bytes([
420            data[offset],
421            data[offset + 1],
422            data[offset + 2],
423            data[offset + 3],
424        ]);
425        offset += 4;
426
427        let (script_len, varint_len) = decode_varint(&data[offset..])?;
428        offset += varint_len;
429
430        let script_sig_end = checked_slice_end(offset, script_len)?;
431        if data.len() < script_sig_end {
432            return Err(ConsensusError::Serialization(Cow::Owned(
433                TransactionParseError::InsufficientBytes.to_string(),
434            )));
435        }
436        let script_sig = data[offset..script_sig_end].to_vec();
437        offset = script_sig_end;
438
439        if data.len() < offset + 4 {
440            return Err(ConsensusError::Serialization(Cow::Owned(
441                TransactionParseError::InsufficientBytes.to_string(),
442            )));
443        }
444        let sequence = u32::from_le_bytes([
445            data[offset],
446            data[offset + 1],
447            data[offset + 2],
448            data[offset + 3],
449        ]) as u64;
450        offset += 4;
451
452        inputs.push(TransactionInput {
453            prevout: OutPoint { hash, index },
454            script_sig,
455            sequence,
456        });
457    }
458
459    let output_count = if implicit_empty_outputs {
460        0
461    } else {
462        let (output_count, varint_len) = decode_varint(&data[offset..])?;
463        offset += varint_len;
464
465        if output_count > 1000000 {
466            return Err(ConsensusError::Serialization(Cow::Owned(
467                TransactionParseError::InvalidOutputCount.to_string(),
468            )));
469        }
470        output_count
471    };
472
473    #[cfg(feature = "production")]
474    let mut outputs = SmallVec::<[TransactionOutput; 2]>::new();
475    #[cfg(not(feature = "production"))]
476    let mut outputs = Vec::new();
477
478    for _ in 0..output_count {
479        if data.len() < offset + 8 {
480            return Err(ConsensusError::Serialization(Cow::Owned(
481                TransactionParseError::InsufficientBytes.to_string(),
482            )));
483        }
484        let value = i64::from_le_bytes([
485            data[offset],
486            data[offset + 1],
487            data[offset + 2],
488            data[offset + 3],
489            data[offset + 4],
490            data[offset + 5],
491            data[offset + 6],
492            data[offset + 7],
493        ]);
494        offset += 8;
495
496        let (script_len, varint_len) = decode_varint(&data[offset..])?;
497        offset += varint_len;
498
499        let script_pubkey_end = checked_slice_end(offset, script_len)?;
500        if data.len() < script_pubkey_end {
501            return Err(ConsensusError::Serialization(Cow::Owned(
502                TransactionParseError::InsufficientBytes.to_string(),
503            )));
504        }
505        let script_pubkey = data[offset..script_pubkey_end].to_vec();
506        offset = script_pubkey_end;
507
508        outputs.push(TransactionOutput {
509            value,
510            script_pubkey,
511        });
512    }
513
514    let mut all_witnesses: Vec<Witness> = Vec::new();
515    if is_segwit {
516        for _ in 0..input_count {
517            let (stack_count, varint_len) = decode_varint(&data[offset..])?;
518            offset += varint_len;
519
520            let mut witness_stack: Witness = Vec::new();
521            for _ in 0..stack_count {
522                let (item_len, varint_len) = decode_varint(&data[offset..])?;
523                offset += varint_len;
524
525                let item_end = checked_slice_end(offset, item_len)?;
526                if data.len() < item_end {
527                    return Err(ConsensusError::Serialization(Cow::Owned(
528                        TransactionParseError::InsufficientBytes.to_string(),
529                    )));
530                }
531                witness_stack.push(data[offset..item_end].to_vec());
532                offset = item_end;
533            }
534            all_witnesses.push(witness_stack);
535        }
536    } else {
537        for _ in 0..input_count {
538            all_witnesses.push(Vec::new());
539        }
540    }
541
542    if data.len() < offset + 4 {
543        return Err(ConsensusError::Serialization(Cow::Owned(
544            TransactionParseError::InsufficientBytes.to_string(),
545        )));
546    }
547    let lock_time = u32::from_le_bytes([
548        data[offset],
549        data[offset + 1],
550        data[offset + 2],
551        data[offset + 3],
552    ]) as u64;
553    offset += 4;
554
555    let tx = Transaction {
556        version,
557        inputs,
558        outputs,
559        lock_time,
560    };
561
562    Ok((tx, all_witnesses, offset))
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568
569    #[test]
570    fn test_serialize_deserialize_round_trip() {
571        let tx = Transaction {
572            version: 1,
573            inputs: crate::tx_inputs![TransactionInput {
574                prevout: OutPoint {
575                    hash: [1; 32],
576                    index: 0
577                },
578                script_sig: vec![0x51],
579                sequence: 0xffffffff,
580            }],
581            outputs: crate::tx_outputs![TransactionOutput {
582                value: 5000000000,
583                script_pubkey: vec![0x51],
584            }],
585            lock_time: 0,
586        };
587
588        let serialized = serialize_transaction(&tx);
589        let deserialized = deserialize_transaction(&serialized).unwrap();
590
591        assert_eq!(deserialized.version, tx.version);
592        assert_eq!(deserialized.inputs.len(), tx.inputs.len());
593        assert_eq!(deserialized.outputs.len(), tx.outputs.len());
594        assert_eq!(deserialized.lock_time, tx.lock_time);
595    }
596
597    /// Bitcoin Core: empty `vin` + flag `0` implies empty `vout` without a separate output-count read.
598    #[test]
599    fn empty_tx_round_trip_matches_double_zero_preamble() {
600        let tx = Transaction {
601            version: 1,
602            inputs: crate::tx_inputs![],
603            outputs: crate::tx_outputs![],
604            lock_time: 0,
605        };
606        let bytes = serialize_transaction(&tx);
607        let back = deserialize_transaction(&bytes).unwrap();
608        assert_eq!(back.version, tx.version);
609        assert!(back.inputs.is_empty());
610        assert!(back.outputs.is_empty());
611        assert_eq!(back.lock_time, tx.lock_time);
612        // version(4) + vin=0 + flags=0 + locktime(4) — two 0x00 bytes after version
613        assert_eq!(&bytes[4..6], &[0u8, 0u8]);
614    }
615
616    #[test]
617    fn zero_inputs_one_output_round_trips_extended_framing() {
618        let tx = Transaction {
619            version: 2,
620            inputs: crate::tx_inputs![],
621            outputs: crate::tx_outputs![TransactionOutput {
622                value: 1000,
623                script_pubkey: vec![0x51],
624            }],
625            lock_time: 0x11223344,
626        };
627        let bytes = serialize_transaction(&tx);
628        let back = deserialize_transaction(&bytes).unwrap();
629        assert_eq!(back.version, tx.version);
630        assert!(back.inputs.is_empty());
631        assert_eq!(back.outputs.len(), 1);
632        assert_eq!(back.outputs[0].value, 1000);
633        assert_eq!(back.lock_time, tx.lock_time);
634        // version(4) + 0x00 dummy vin + 0x01 flag + 0x00 real ic + vout count + ...
635        assert_eq!(&bytes[4..8], &[0u8, 1, 0, 1]);
636    }
637}