Skip to main content

bee/swarm/
bmt.rs

1//! Binary Merkle Tree (BMT) chunk addressing. Mirrors bee-go's
2//! `pkg/swarm/bmt.go` and `pkg/swarm/chunk.go`.
3//!
4//! A Swarm chunk is `span (8 bytes, little-endian u64) || payload (≤ 4096 bytes)`.
5//! The chunk address is `keccak256(span || bmt_root(zero_padded_payload))`,
6//! where the BMT root is computed by recursively hashing 32-byte
7//! segment pairs over the full 4096-byte payload region.
8
9use sha3::{Digest, Keccak256};
10
11use crate::swarm::errors::Error;
12use crate::swarm::typed_bytes::{Reference, SPAN_LENGTH, Span};
13
14/// Maximum chunk payload size.
15pub const CHUNK_SIZE: usize = 4096;
16/// BMT segment size.
17pub const SEGMENT_SIZE: usize = 32;
18/// Number of leaf segments in the BMT.
19pub const SEGMENTS_COUNT: usize = CHUNK_SIZE / SEGMENT_SIZE;
20
21/// Minimum CAC payload (must be at least one byte).
22pub const MIN_PAYLOAD_SIZE: usize = 1;
23/// Maximum CAC payload (chunk size).
24pub const MAX_PAYLOAD_SIZE: usize = CHUNK_SIZE;
25
26/// `keccak256(input)` as a 32-byte array.
27pub fn keccak256(input: &[u8]) -> [u8; 32] {
28    let mut h = Keccak256::new();
29    h.update(input);
30    let out = h.finalize();
31    let mut a = [0u8; 32];
32    a.copy_from_slice(&out);
33    a
34}
35
36/// Compute the BMT chunk address for a full chunk (`span || payload`).
37///
38/// Returns [`Error::Argument`] if `data` is shorter than the span or
39/// the payload exceeds [`CHUNK_SIZE`].
40pub fn calculate_chunk_address(data: &[u8]) -> Result<[u8; 32], Error> {
41    if data.len() < SPAN_LENGTH {
42        return Err(Error::argument("chunk data shorter than span"));
43    }
44    let span = &data[..SPAN_LENGTH];
45    let payload = &data[SPAN_LENGTH..];
46    if payload.len() > CHUNK_SIZE {
47        return Err(Error::argument("chunk payload exceeds CHUNK_SIZE"));
48    }
49
50    let mut padded = [0u8; CHUNK_SIZE];
51    padded[..payload.len()].copy_from_slice(payload);
52
53    let root = bmt_root(&padded);
54
55    let mut h = Keccak256::new();
56    h.update(span);
57    h.update(root);
58    let out = h.finalize();
59    let mut a = [0u8; 32];
60    a.copy_from_slice(&out);
61    Ok(a)
62}
63
64/// BMT root over a zero-padded 4096-byte payload region.
65fn bmt_root(payload: &[u8; CHUNK_SIZE]) -> [u8; 32] {
66    // Each leaf is a raw 32-byte segment. We then keccak-pair-reduce
67    // until one 32-byte root remains.
68    let mut level: Vec<[u8; 32]> = (0..SEGMENTS_COUNT)
69        .map(|i| {
70            let mut seg = [0u8; SEGMENT_SIZE];
71            seg.copy_from_slice(&payload[i * SEGMENT_SIZE..(i + 1) * SEGMENT_SIZE]);
72            seg
73        })
74        .collect();
75
76    while level.len() > 1 {
77        let next: Vec<[u8; 32]> = level
78            .chunks(2)
79            .map(|pair| {
80                let mut h = Keccak256::new();
81                h.update(pair[0]);
82                h.update(pair[1]);
83                let out = h.finalize();
84                let mut a = [0u8; 32];
85                a.copy_from_slice(&out);
86                a
87            })
88            .collect();
89        level = next;
90    }
91    level[0]
92}
93
94/// Content-addressed chunk: a span + payload pair whose address is the
95/// BMT root over the chunk. Mirrors bee-js `Chunk` (`cac.ts`).
96#[derive(Clone, Debug, PartialEq, Eq)]
97pub struct Chunk {
98    /// BMT chunk address.
99    pub address: Reference,
100    /// Span (8-byte little-endian payload length).
101    pub span: Span,
102    /// Raw payload (1..=4096 bytes).
103    pub payload: Vec<u8>,
104}
105
106impl Chunk {
107    /// Wire form: `span (8 bytes) || payload`. Suitable as the body of
108    /// `POST /chunks/{addr}`.
109    pub fn data(&self) -> Vec<u8> {
110        let mut out = Vec::with_capacity(SPAN_LENGTH + self.payload.len());
111        out.extend_from_slice(self.span.as_bytes());
112        out.extend_from_slice(&self.payload);
113        out
114    }
115}
116
117/// Build a content-addressed chunk: encode the span, compute the BMT
118/// address, return the typed [`Chunk`]. Mirrors bee-js
119/// `makeContentAddressedChunk`.
120pub fn make_content_addressed_chunk(payload: &[u8]) -> Result<Chunk, Error> {
121    if payload.is_empty() || payload.len() > MAX_PAYLOAD_SIZE {
122        return Err(Error::argument(format!(
123            "payload size out of bounds: {}",
124            payload.len()
125        )));
126    }
127    let span = Span::from_u64(payload.len() as u64);
128
129    let mut full = Vec::with_capacity(SPAN_LENGTH + payload.len());
130    full.extend_from_slice(span.as_bytes());
131    full.extend_from_slice(payload);
132
133    let addr = calculate_chunk_address(&full)?;
134    let address = Reference::new(&addr)?;
135    Ok(Chunk {
136        address,
137        span,
138        payload: payload.to_vec(),
139    })
140}
141
142#[cfg(test)]
143mod tests {
144    use super::*;
145
146    /// Cross-checked against bee-go (`MakeContentAddressedChunk`):
147    /// BMT address of "hello world" is this value.
148    #[test]
149    fn cac_address_for_known_payload() {
150        let chunk = make_content_addressed_chunk(b"hello world").unwrap();
151        assert_eq!(
152            chunk.address.to_hex(),
153            "92672a471f4419b255d7cb0cf313474a6f5856fb347c5ece85fb706d644b630f"
154        );
155        assert_eq!(chunk.span.to_u64(), 11);
156        assert_eq!(chunk.payload, b"hello world");
157    }
158
159    #[test]
160    fn rejects_empty_and_oversized_payload() {
161        assert!(make_content_addressed_chunk(&[]).is_err());
162        let oversize = vec![0u8; MAX_PAYLOAD_SIZE + 1];
163        assert!(make_content_addressed_chunk(&oversize).is_err());
164    }
165
166    #[test]
167    fn data_returns_span_then_payload() {
168        let payload = b"test";
169        let chunk = make_content_addressed_chunk(payload).unwrap();
170        let data = chunk.data();
171        assert_eq!(data.len(), SPAN_LENGTH + payload.len());
172        assert_eq!(&data[..SPAN_LENGTH], chunk.span.as_bytes());
173        assert_eq!(&data[SPAN_LENGTH..], payload);
174    }
175
176    #[test]
177    fn calculate_chunk_address_rejects_short_data() {
178        assert!(calculate_chunk_address(&[0u8; 4]).is_err());
179        assert!(calculate_chunk_address(&[0u8; SPAN_LENGTH]).is_ok()); // empty payload OK
180    }
181
182    #[test]
183    fn keccak256_known_value() {
184        // keccak256("") - well-known zero-length hash
185        assert_eq!(
186            hex::encode(keccak256(b"")),
187            "c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470"
188        );
189    }
190}