Skip to main content

blvm_primitives/serialization/
transaction.rs

1//! Transaction wire format serialization/deserialization
2//!
3//! Bitcoin transaction wire format specification.
4//! Must match Bitcoin protocol serialization exactly for consensus compatibility.
5
6use super::varint::{decode_varint, encode_varint};
7use crate::error::{ConsensusError, Result};
8use crate::types::*;
9use std::borrow::Cow;
10
11#[cfg(feature = "production")]
12use smallvec::SmallVec;
13
14/// Error type for transaction parsing failures
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub enum TransactionParseError {
17    InsufficientBytes,
18    InvalidVersion,
19    InvalidInputCount,
20    InvalidOutputCount,
21    InvalidScriptLength,
22    InvalidLockTime,
23}
24
25impl std::fmt::Display for TransactionParseError {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        match self {
28            TransactionParseError::InsufficientBytes => {
29                write!(f, "Insufficient bytes to parse transaction")
30            }
31            TransactionParseError::InvalidVersion => write!(f, "Invalid transaction version"),
32            TransactionParseError::InvalidInputCount => write!(f, "Invalid input count"),
33            TransactionParseError::InvalidOutputCount => write!(f, "Invalid output count"),
34            TransactionParseError::InvalidScriptLength => write!(f, "Invalid script length"),
35            TransactionParseError::InvalidLockTime => write!(f, "Invalid lock time"),
36        }
37    }
38}
39
40impl std::error::Error for TransactionParseError {}
41
42#[inline]
43fn checked_slice_end(offset: usize, len: u64) -> Result<usize> {
44    let len = usize::try_from(len).map_err(|_| {
45        ConsensusError::Serialization(Cow::Owned(
46            TransactionParseError::InvalidScriptLength.to_string(),
47        ))
48    })?;
49    offset.checked_add(len).ok_or_else(|| {
50        ConsensusError::Serialization(Cow::Owned(
51            TransactionParseError::InsufficientBytes.to_string(),
52        ))
53    })
54}
55
56/// After the 4-byte `version`, read the compact input count and optional segwit wrapper.
57///
58/// Matches [`UnserializeTransaction`] in Bitcoin Core: the first varint is always the input count.
59/// If it is zero, the next byte is an optional-features flag. `1` means BIP141 extended encoding
60/// (marker was absorbed into the empty `vin` vector); the real input-count varint follows. Flag `0`
61/// means no extension: `vin` and `vout` both stay empty and **no** output-count varint appears on
62/// the wire before `lock_time`.
63///
64/// **Do not** detect segwit by peeking `0x00 0x01` before decoding that first varint: the compact
65/// encoding of the input count is not always a single `0x00` byte, so peeking mis-aligns the stream.
66fn read_tx_input_count_after_version(
67    data: &[u8],
68    mut offset: usize,
69) -> Result<(bool, u64, usize, bool)> {
70    let (mut input_count, varint_len) = decode_varint(&data[offset..])?;
71    offset += varint_len;
72
73    if input_count > 1_000_000 {
74        return Err(ConsensusError::Serialization(Cow::Owned(
75            TransactionParseError::InvalidInputCount.to_string(),
76        )));
77    }
78
79    let mut is_segwit = false;
80    // When true, Bitcoin Core left vout empty without reading a vector length (flag byte was 0).
81    let mut implicit_empty_outputs = false;
82
83    if input_count == 0 {
84        if offset >= data.len() {
85            return Err(ConsensusError::Serialization(Cow::Owned(
86                TransactionParseError::InsufficientBytes.to_string(),
87            )));
88        }
89        let flag = data[offset];
90        offset += 1;
91
92        if flag == 0 {
93            implicit_empty_outputs = true;
94            return Ok((false, 0, offset, implicit_empty_outputs));
95        }
96
97        if flag != 1 {
98            return Err(ConsensusError::Serialization(Cow::Owned(format!(
99                "Unsupported segwit transaction flag: {flag}"
100            ))));
101        }
102
103        is_segwit = true;
104
105        let (ic2, vl2) = decode_varint(&data[offset..])?;
106        offset += vl2;
107        if ic2 > 1_000_000 {
108            return Err(ConsensusError::Serialization(Cow::Owned(
109                TransactionParseError::InvalidInputCount.to_string(),
110            )));
111        }
112        input_count = ic2;
113    }
114
115    Ok((is_segwit, input_count, offset, implicit_empty_outputs))
116}
117
118/// Serialize a transaction to Bitcoin wire format
119#[inline(always)]
120pub fn serialize_transaction(tx: &Transaction) -> Vec<u8> {
121    let mut result = Vec::new();
122    serialize_transaction_append(&mut result, tx);
123    result
124}
125
126/// Append serialized transaction to buffer (shared logic for into/inner).
127#[inline(always)]
128fn serialize_transaction_append(result: &mut Vec<u8>, tx: &Transaction) {
129    result.extend_from_slice(&(tx.version as i32).to_le_bytes());
130    result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
131
132    for input in &tx.inputs {
133        result.extend_from_slice(&input.prevout.hash);
134        result.extend_from_slice(&input.prevout.index.to_le_bytes());
135        result.extend_from_slice(&encode_varint(input.script_sig.len() as u64));
136        result.extend_from_slice(&input.script_sig);
137        result.extend_from_slice(&(input.sequence as u32).to_le_bytes());
138    }
139
140    result.extend_from_slice(&encode_varint(tx.outputs.len() as u64));
141
142    for output in &tx.outputs {
143        result.extend_from_slice(&(output.value as u64).to_le_bytes());
144        result.extend_from_slice(&encode_varint(output.script_pubkey.len() as u64));
145        result.extend_from_slice(&output.script_pubkey);
146    }
147
148    result.extend_from_slice(&(tx.lock_time as u32).to_le_bytes());
149}
150
151/// Serialize transaction into an existing buffer
152#[inline(always)]
153pub fn serialize_transaction_into(dst: &mut Vec<u8>, tx: &Transaction) -> usize {
154    dst.clear();
155    serialize_transaction_append(dst, tx);
156    dst.len()
157}
158
159/// Serialize a transaction in SegWit wire format
160pub fn serialize_transaction_with_witness(tx: &Transaction, witnesses: &[Witness]) -> Vec<u8> {
161    assert_eq!(
162        witnesses.len(),
163        tx.inputs.len(),
164        "witness count must match input count"
165    );
166    let mut result = Vec::new();
167    result.extend_from_slice(&(tx.version as i32).to_le_bytes());
168    result.push(0x00);
169    result.push(0x01);
170    result.extend_from_slice(&encode_varint(tx.inputs.len() as u64));
171    for input in &tx.inputs {
172        result.extend_from_slice(&input.prevout.hash);
173        result.extend_from_slice(&input.prevout.index.to_le_bytes());
174        result.extend_from_slice(&encode_varint(input.script_sig.len() as u64));
175        result.extend_from_slice(&input.script_sig);
176        result.extend_from_slice(&(input.sequence as u32).to_le_bytes());
177    }
178    result.extend_from_slice(&encode_varint(tx.outputs.len() as u64));
179    for output in &tx.outputs {
180        result.extend_from_slice(&(output.value as u64).to_le_bytes());
181        result.extend_from_slice(&encode_varint(output.script_pubkey.len() as u64));
182        result.extend_from_slice(&output.script_pubkey);
183    }
184    for witness in witnesses {
185        result.extend_from_slice(&encode_varint(witness.len() as u64));
186        for element in witness {
187            result.extend_from_slice(&encode_varint(element.len() as u64));
188            result.extend_from_slice(element);
189        }
190    }
191    result.extend_from_slice(&(tx.lock_time as u32).to_le_bytes());
192    result
193}
194
195/// Deserialize a transaction from Bitcoin wire format
196pub fn deserialize_transaction(data: &[u8]) -> Result<Transaction> {
197    let mut offset = 0;
198
199    if data.len() < offset + 4 {
200        return Err(ConsensusError::Serialization(Cow::Owned(
201            TransactionParseError::InsufficientBytes.to_string(),
202        )));
203    }
204    let version = i32::from_le_bytes([
205        data[offset],
206        data[offset + 1],
207        data[offset + 2],
208        data[offset + 3],
209    ]) as u64;
210    offset += 4;
211
212    let (is_segwit, input_count, mut offset, implicit_empty_outputs) =
213        read_tx_input_count_after_version(data, offset)?;
214
215    #[cfg(feature = "production")]
216    let mut inputs = SmallVec::<[TransactionInput; 2]>::new();
217    #[cfg(not(feature = "production"))]
218    let mut inputs = Vec::new();
219
220    for _ in 0..input_count {
221        if data.len() < offset + 36 {
222            return Err(ConsensusError::Serialization(Cow::Owned(
223                TransactionParseError::InsufficientBytes.to_string(),
224            )));
225        }
226        let mut hash = [0u8; 32];
227        hash.copy_from_slice(&data[offset..offset + 32]);
228        offset += 32;
229
230        let index = u32::from_le_bytes([
231            data[offset],
232            data[offset + 1],
233            data[offset + 2],
234            data[offset + 3],
235        ]);
236        offset += 4;
237
238        let (script_len, varint_len) = decode_varint(&data[offset..])?;
239        offset += varint_len;
240
241        let script_sig_end = checked_slice_end(offset, script_len)?;
242        if data.len() < script_sig_end {
243            return Err(ConsensusError::Serialization(Cow::Owned(
244                TransactionParseError::InsufficientBytes.to_string(),
245            )));
246        }
247        let script_sig = data[offset..script_sig_end].to_vec();
248        offset = script_sig_end;
249
250        if data.len() < offset + 4 {
251            return Err(ConsensusError::Serialization(Cow::Owned(
252                TransactionParseError::InsufficientBytes.to_string(),
253            )));
254        }
255        let sequence = u32::from_le_bytes([
256            data[offset],
257            data[offset + 1],
258            data[offset + 2],
259            data[offset + 3],
260        ]) as u64;
261        offset += 4;
262
263        inputs.push(TransactionInput {
264            prevout: OutPoint { hash, index },
265            script_sig,
266            sequence,
267        });
268    }
269
270    let output_count = if implicit_empty_outputs {
271        0
272    } else {
273        let (output_count, varint_len) = decode_varint(&data[offset..])?;
274        offset += varint_len;
275
276        if output_count > 1000000 {
277            return Err(ConsensusError::Serialization(Cow::Owned(
278                TransactionParseError::InvalidOutputCount.to_string(),
279            )));
280        }
281        output_count
282    };
283
284    #[cfg(feature = "production")]
285    let mut outputs = SmallVec::<[TransactionOutput; 2]>::new();
286    #[cfg(not(feature = "production"))]
287    let mut outputs = Vec::new();
288
289    for _ in 0..output_count {
290        if data.len() < offset + 8 {
291            return Err(ConsensusError::Serialization(Cow::Owned(
292                TransactionParseError::InsufficientBytes.to_string(),
293            )));
294        }
295        let value = i64::from_le_bytes([
296            data[offset],
297            data[offset + 1],
298            data[offset + 2],
299            data[offset + 3],
300            data[offset + 4],
301            data[offset + 5],
302            data[offset + 6],
303            data[offset + 7],
304        ]);
305        offset += 8;
306
307        let (script_len, varint_len) = decode_varint(&data[offset..])?;
308        offset += varint_len;
309
310        let script_pubkey_end = checked_slice_end(offset, script_len)?;
311        if data.len() < script_pubkey_end {
312            return Err(ConsensusError::Serialization(Cow::Owned(
313                TransactionParseError::InsufficientBytes.to_string(),
314            )));
315        }
316        let script_pubkey = data[offset..script_pubkey_end].to_vec();
317        offset = script_pubkey_end;
318
319        outputs.push(TransactionOutput {
320            value,
321            script_pubkey,
322        });
323    }
324
325    if is_segwit {
326        for _ in 0..input_count {
327            let (stack_count, varint_len) = decode_varint(&data[offset..])?;
328            offset += varint_len;
329            for _ in 0..stack_count {
330                let (item_len, varint_len) = decode_varint(&data[offset..])?;
331                offset += varint_len;
332                let item_end = checked_slice_end(offset, item_len)?;
333                if data.len() < item_end {
334                    return Err(ConsensusError::Serialization(Cow::Owned(
335                        TransactionParseError::InsufficientBytes.to_string(),
336                    )));
337                }
338                offset = item_end;
339            }
340        }
341    }
342
343    if data.len() < offset + 4 {
344        return Err(ConsensusError::Serialization(Cow::Owned(
345            TransactionParseError::InsufficientBytes.to_string(),
346        )));
347    }
348    let lock_time = u32::from_le_bytes([
349        data[offset],
350        data[offset + 1],
351        data[offset + 2],
352        data[offset + 3],
353    ]) as u64;
354
355    Ok(Transaction {
356        version,
357        inputs,
358        outputs,
359        lock_time,
360    })
361}
362
363/// Deserialize a transaction, returning (tx, bytes_consumed). Convenience wrapper that discards witness data.
364pub fn deserialize_transaction_with_offset(data: &[u8]) -> Result<(Transaction, usize)> {
365    let (tx, _witnesses, bytes_consumed) = deserialize_transaction_with_witness(data)?;
366    Ok((tx, bytes_consumed))
367}
368
369/// Deserialize a transaction from Bitcoin wire format, returning transaction, witness, and bytes consumed
370pub fn deserialize_transaction_with_witness(
371    data: &[u8],
372) -> Result<(Transaction, Vec<Witness>, usize)> {
373    let mut offset = 0;
374
375    if data.len() < offset + 4 {
376        return Err(ConsensusError::Serialization(Cow::Owned(
377            TransactionParseError::InsufficientBytes.to_string(),
378        )));
379    }
380    let version = i32::from_le_bytes([
381        data[offset],
382        data[offset + 1],
383        data[offset + 2],
384        data[offset + 3],
385    ]) as u64;
386    offset += 4;
387
388    let (is_segwit, input_count, mut offset, implicit_empty_outputs) =
389        read_tx_input_count_after_version(data, offset)?;
390
391    #[cfg(feature = "production")]
392    let mut inputs = SmallVec::<[TransactionInput; 2]>::new();
393    #[cfg(not(feature = "production"))]
394    let mut inputs = Vec::new();
395
396    for _ in 0..input_count {
397        if data.len() < offset + 36 {
398            return Err(ConsensusError::Serialization(Cow::Owned(
399                TransactionParseError::InsufficientBytes.to_string(),
400            )));
401        }
402        let mut hash = [0u8; 32];
403        hash.copy_from_slice(&data[offset..offset + 32]);
404        offset += 32;
405
406        let index = u32::from_le_bytes([
407            data[offset],
408            data[offset + 1],
409            data[offset + 2],
410            data[offset + 3],
411        ]);
412        offset += 4;
413
414        let (script_len, varint_len) = decode_varint(&data[offset..])?;
415        offset += varint_len;
416
417        let script_sig_end = checked_slice_end(offset, script_len)?;
418        if data.len() < script_sig_end {
419            return Err(ConsensusError::Serialization(Cow::Owned(
420                TransactionParseError::InsufficientBytes.to_string(),
421            )));
422        }
423        let script_sig = data[offset..script_sig_end].to_vec();
424        offset = script_sig_end;
425
426        if data.len() < offset + 4 {
427            return Err(ConsensusError::Serialization(Cow::Owned(
428                TransactionParseError::InsufficientBytes.to_string(),
429            )));
430        }
431        let sequence = u32::from_le_bytes([
432            data[offset],
433            data[offset + 1],
434            data[offset + 2],
435            data[offset + 3],
436        ]) as u64;
437        offset += 4;
438
439        inputs.push(TransactionInput {
440            prevout: OutPoint { hash, index },
441            script_sig,
442            sequence,
443        });
444    }
445
446    let output_count = if implicit_empty_outputs {
447        0
448    } else {
449        let (output_count, varint_len) = decode_varint(&data[offset..])?;
450        offset += varint_len;
451
452        if output_count > 1000000 {
453            return Err(ConsensusError::Serialization(Cow::Owned(
454                TransactionParseError::InvalidOutputCount.to_string(),
455            )));
456        }
457        output_count
458    };
459
460    #[cfg(feature = "production")]
461    let mut outputs = SmallVec::<[TransactionOutput; 2]>::new();
462    #[cfg(not(feature = "production"))]
463    let mut outputs = Vec::new();
464
465    for _ in 0..output_count {
466        if data.len() < offset + 8 {
467            return Err(ConsensusError::Serialization(Cow::Owned(
468                TransactionParseError::InsufficientBytes.to_string(),
469            )));
470        }
471        let value = i64::from_le_bytes([
472            data[offset],
473            data[offset + 1],
474            data[offset + 2],
475            data[offset + 3],
476            data[offset + 4],
477            data[offset + 5],
478            data[offset + 6],
479            data[offset + 7],
480        ]);
481        offset += 8;
482
483        let (script_len, varint_len) = decode_varint(&data[offset..])?;
484        offset += varint_len;
485
486        let script_pubkey_end = checked_slice_end(offset, script_len)?;
487        if data.len() < script_pubkey_end {
488            return Err(ConsensusError::Serialization(Cow::Owned(
489                TransactionParseError::InsufficientBytes.to_string(),
490            )));
491        }
492        let script_pubkey = data[offset..script_pubkey_end].to_vec();
493        offset = script_pubkey_end;
494
495        outputs.push(TransactionOutput {
496            value,
497            script_pubkey,
498        });
499    }
500
501    let mut all_witnesses: Vec<Witness> = Vec::new();
502    if is_segwit {
503        for _ in 0..input_count {
504            let (stack_count, varint_len) = decode_varint(&data[offset..])?;
505            offset += varint_len;
506
507            let mut witness_stack: Witness = Vec::new();
508            for _ in 0..stack_count {
509                let (item_len, varint_len) = decode_varint(&data[offset..])?;
510                offset += varint_len;
511
512                let item_end = checked_slice_end(offset, item_len)?;
513                if data.len() < item_end {
514                    return Err(ConsensusError::Serialization(Cow::Owned(
515                        TransactionParseError::InsufficientBytes.to_string(),
516                    )));
517                }
518                witness_stack.push(data[offset..item_end].to_vec());
519                offset = item_end;
520            }
521            all_witnesses.push(witness_stack);
522        }
523    } else {
524        for _ in 0..input_count {
525            all_witnesses.push(Vec::new());
526        }
527    }
528
529    if data.len() < offset + 4 {
530        return Err(ConsensusError::Serialization(Cow::Owned(
531            TransactionParseError::InsufficientBytes.to_string(),
532        )));
533    }
534    let lock_time = u32::from_le_bytes([
535        data[offset],
536        data[offset + 1],
537        data[offset + 2],
538        data[offset + 3],
539    ]) as u64;
540    offset += 4;
541
542    let tx = Transaction {
543        version,
544        inputs,
545        outputs,
546        lock_time,
547    };
548
549    Ok((tx, all_witnesses, offset))
550}
551
552#[cfg(test)]
553mod tests {
554    use super::*;
555
556    #[test]
557    fn test_serialize_deserialize_round_trip() {
558        let tx = Transaction {
559            version: 1,
560            inputs: crate::tx_inputs![TransactionInput {
561                prevout: OutPoint {
562                    hash: [1; 32],
563                    index: 0
564                },
565                script_sig: vec![0x51],
566                sequence: 0xffffffff,
567            }],
568            outputs: crate::tx_outputs![TransactionOutput {
569                value: 5000000000,
570                script_pubkey: vec![0x51],
571            }],
572            lock_time: 0,
573        };
574
575        let serialized = serialize_transaction(&tx);
576        let deserialized = deserialize_transaction(&serialized).unwrap();
577
578        assert_eq!(deserialized.version, tx.version);
579        assert_eq!(deserialized.inputs.len(), tx.inputs.len());
580        assert_eq!(deserialized.outputs.len(), tx.outputs.len());
581        assert_eq!(deserialized.lock_time, tx.lock_time);
582    }
583
584    /// Bitcoin Core: empty `vin` + flag `0` implies empty `vout` without a separate output-count read.
585    #[test]
586    fn empty_tx_round_trip_matches_double_zero_preamble() {
587        let tx = Transaction {
588            version: 1,
589            inputs: crate::tx_inputs![],
590            outputs: crate::tx_outputs![],
591            lock_time: 0,
592        };
593        let bytes = serialize_transaction(&tx);
594        let back = deserialize_transaction(&bytes).unwrap();
595        assert_eq!(back.version, tx.version);
596        assert!(back.inputs.is_empty());
597        assert!(back.outputs.is_empty());
598        assert_eq!(back.lock_time, tx.lock_time);
599        // version(4) + vin=0 + flags=0 + locktime(4) — two 0x00 bytes after version
600        assert_eq!(&bytes[4..6], &[0u8, 0u8]);
601    }
602}