smoldot 1.1.0

Primitives to build a client for Substrate-based blockchains
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
// Smoldot
// Copyright (C) 2019-2022  Parity Technologies (UK) Ltd.
// SPDX-License-Identifier: GPL-3.0-or-later WITH Classpath-exception-2.0

// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.

use super::statement::Topic;
use alloc::vec::Vec;
use core::hash::{BuildHasher, Hasher};
use fastbloom::DefaultHasher as BloomDefaultHasher;

/// Maximum number of bits allowed in a bloom filter received from the network.
/// 1 MiB (the notification size budget) = 8_388_608 bits.
const MAX_BLOOM_BITS: usize = 1024 * 1024 * 8;

/// Maximum number of hash functions allowed.
/// Optimal hash count is `(bits / items) * ln(2)`. With the minimum allocation of 64 bits
/// and 1 expected item this yields ~44, so the limit must be at least that high. 64 covers all
/// practical configurations while preventing CPU abuse from peers.
pub(crate) const MAX_NUM_HASHES: u32 = 64;

/// Wraps `fastbloom::DefaultHasher` to force `write_usize`/`write_isize` to always emit
/// 8-byte LE values, ensuring identical bloom filter bits on wasm32 and 64-bit native targets.
#[derive(Clone, Debug)]
struct PortableBuildHasher(BloomDefaultHasher);

impl PortableBuildHasher {
    fn seeded(seed: u128) -> Self {
        Self(BloomDefaultHasher::seeded(&seed.to_le_bytes()))
    }
}

impl BuildHasher for PortableBuildHasher {
    type Hasher = PortableHasher;

    fn build_hasher(&self) -> Self::Hasher {
        PortableHasher(self.0.build_hasher())
    }
}

/// Hasher state returned by [`PortableBuildHasher`].  Delegates everything to
/// the inner SipHash-based hasher but overrides `write_usize` and `write_isize`
/// so that platform-width integers are always 8 bytes regardless of pointer
/// width.
#[derive(Clone)]
struct PortableHasher(<BloomDefaultHasher as BuildHasher>::Hasher);

impl Hasher for PortableHasher {
    #[inline]
    fn finish(&self) -> u64 {
        self.0.finish()
    }

    #[inline]
    fn write(&mut self, bytes: &[u8]) {
        self.0.write(bytes);
    }

    #[inline]
    fn write_usize(&mut self, i: usize) {
        // Always write as 8-byte little-endian so that `wasm32` (4-byte
        // usize) and 64-bit targets produce the same hash.
        self.0.write(&(i as u64).to_le_bytes());
    }

    #[inline]
    fn write_isize(&mut self, i: isize) {
        // Always write as 8-byte little-endian for the same reason as
        // `write_usize`.
        self.0.write(&(i as i64).to_le_bytes());
    }
}

/// Wire representation of a bloom filter.
struct EncodedBloomFilter {
    // Seed used for hashing items in the bloom filter. Needed for the peer to reconstruct the same
    // bloom filter.
    seed: u128,
    // Number of hash functions used in the bloom filter. Needed for the peer to reconstruct the
    // same bloom filter.
    num_hashes: u32,
    // Bloom filter bits as a vector of u64. The bloom filter is reconstructed by the peer using
    // these bits.
    bits: Vec<u64>,
}

impl EncodedBloomFilter {
    fn encode_to_vec(&self) -> Vec<u8> {
        let mut out = Vec::new();
        out.extend_from_slice(&self.seed.to_le_bytes());
        out.extend_from_slice(&self.num_hashes.to_le_bytes());
        out.extend_from_slice(crate::util::encode_scale_compact_usize(self.bits.len()).as_ref());
        for &word in &self.bits {
            out.extend_from_slice(&word.to_le_bytes());
        }
        out
    }

    fn decode(data: &[u8]) -> Result<Self, DecodeAffinityFilterError> {
        if data.len() < 20 {
            return Err(DecodeAffinityFilterError);
        }
        let seed = u128::from_le_bytes(<[u8; 16]>::try_from(&data[..16]).unwrap());
        let num_hashes = u32::from_le_bytes(<[u8; 4]>::try_from(&data[16..20]).unwrap());
        let rest = &data[20..];
        let (rest, bits_len) =
            crate::util::nom_scale_compact_usize::<nom::error::Error<&[u8]>>(rest)
                .map_err(|_| DecodeAffinityFilterError)?;
        if rest.len() != bits_len * 8 {
            return Err(DecodeAffinityFilterError);
        }
        let mut bits = Vec::with_capacity(bits_len);
        for chunk in rest.chunks_exact(8) {
            bits.push(u64::from_le_bytes(<[u8; 8]>::try_from(chunk).unwrap()));
        }
        Ok(EncodedBloomFilter {
            seed,
            num_hashes,
            bits,
        })
    }
}

#[derive(Debug, Clone)]
pub struct AffinityFilter {
    /// Bloom filter bytes representing the topics this peer is interested in.
    bloom: fastbloom::BloomFilter<PortableBuildHasher>,
    /// Seed used for hashing items in the bloom filter.
    seed: u128,
}

impl AffinityFilter {
    pub fn new(seed: u128, false_pos: f64, expected_items: usize) -> Self {
        let bloom = fastbloom::BloomFilter::with_false_pos(false_pos)
            .hasher(PortableBuildHasher::seeded(seed))
            .expected_items(expected_items);
        AffinityFilter { bloom, seed }
    }

    pub fn from_topics<'a>(
        topics: impl Iterator<Item = &'a [u8; 32]>,
        seed: u128,
        false_positive_rate: f64,
    ) -> Self {
        let topics: Vec<&[u8; 32]> = topics.collect();
        let count = topics.len().max(1);
        let mut filter = Self::new(seed, false_positive_rate, count);
        for topic in topics {
            filter.insert(topic);
        }
        filter
    }

    pub fn decode(data: &[u8]) -> Result<Self, DecodeAffinityFilterError> {
        let encoded = EncodedBloomFilter::decode(data)?;
        if encoded.bits.is_empty() {
            return Err(DecodeAffinityFilterError);
        }
        if encoded.bits.len() * u64::BITS as usize > MAX_BLOOM_BITS {
            return Err(DecodeAffinityFilterError);
        }
        if encoded.num_hashes == 0 || encoded.num_hashes > MAX_NUM_HASHES {
            return Err(DecodeAffinityFilterError);
        }
        let bloom = fastbloom::BloomFilter::from_vec(encoded.bits)
            .hasher(PortableBuildHasher::seeded(encoded.seed))
            .hashes(encoded.num_hashes);
        Ok(AffinityFilter {
            bloom,
            seed: encoded.seed,
        })
    }

    /// Insert a topic into the bloom filter.
    pub fn insert(&mut self, topic: &[u8; 32]) {
        self.bloom.insert(topic);
    }

    /// Check if a topic is likely present in the bloom filter.
    pub fn contains(&self, topic: &[u8; 32]) -> bool {
        self.bloom.contains(topic)
    }

    /// Check if topics match this affinity filter.
    ///
    /// Topics match if any of them is present in the bloom filter.
    /// No topics always match (from broadcast statements).
    pub fn matches_statement(&self, topics: &[&Topic]) -> bool {
        if topics.is_empty() {
            return true;
        }
        topics.iter().any(|t| self.bloom.contains(t))
    }

    pub fn encode_to_vec(&self) -> Vec<u8> {
        debug_assert!((1..=MAX_NUM_HASHES).contains(&self.bloom.num_hashes()));
        let encoded = EncodedBloomFilter {
            seed: self.seed,
            num_hashes: self.bloom.num_hashes(),
            bits: self.bloom.as_slice().to_vec(),
        };
        encoded.encode_to_vec()
    }

    pub fn match_all(seed: u128) -> Self {
        let bits = alloc::vec![u64::MAX; 16];
        let bloom = fastbloom::BloomFilter::from_vec(bits)
            .hasher(PortableBuildHasher::seeded(seed))
            .hashes(1);
        AffinityFilter { bloom, seed }
    }
}

#[derive(Debug, derive_more::Display, Clone)]
#[display("Invalid bloom filter encoding")]
pub struct DecodeAffinityFilterError;

#[cfg(test)]
mod tests {
    use super::*;

    const BLOOM_FALSE_POS_RATE: f64 = 0.01;
    const TEST_SEED: u128 = 0x5EED_5EED_5EED_5EED;

    const MAX_BLOOM_WORDS: usize = MAX_BLOOM_BITS / u64::BITS as usize;

    #[test]
    fn num_hashes_is_within_substrate_limit() {
        let mut filter = AffinityFilter::new(TEST_SEED, BLOOM_FALSE_POS_RATE, 1);
        filter.insert(&[0xAA; 32]);
        let encoded = filter.encode_to_vec();
        let num_hashes = u32::from_le_bytes(<[u8; 4]>::try_from(&encoded[16..20]).unwrap());
        assert!(
            (1..=MAX_NUM_HASHES).contains(&num_hashes),
            "num_hashes {num_hashes} out of allowed range 1..={MAX_NUM_HASHES}"
        );
    }

    #[test]
    fn decode_rejects_empty_bits() {
        let encoded = EncodedBloomFilter {
            seed: TEST_SEED,
            num_hashes: 7,
            bits: vec![],
        };
        let bytes = encoded.encode_to_vec();
        assert!(AffinityFilter::decode(&bytes).is_err());
    }

    #[test]
    fn decode_rejects_oversized_bits() {
        let encoded = EncodedBloomFilter {
            seed: TEST_SEED,
            num_hashes: 7,
            bits: vec![0u64; MAX_BLOOM_WORDS + 1],
        };
        let bytes = encoded.encode_to_vec();
        assert!(AffinityFilter::decode(&bytes).is_err());
    }

    #[test]
    fn decode_rejects_zero_num_hashes() {
        let encoded = EncodedBloomFilter {
            seed: TEST_SEED,
            num_hashes: 0,
            bits: vec![0u64; 16],
        };
        let bytes = encoded.encode_to_vec();
        assert!(AffinityFilter::decode(&bytes).is_err());
    }

    #[test]
    fn decode_rejects_excessive_num_hashes() {
        let encoded = EncodedBloomFilter {
            seed: TEST_SEED,
            num_hashes: u32::MAX,
            bits: vec![0u64; 16],
        };
        let bytes = encoded.encode_to_vec();
        assert!(AffinityFilter::decode(&bytes).is_err());
    }

    #[test]
    fn decode_accepts_valid_bounds() {
        let encoded = EncodedBloomFilter {
            seed: TEST_SEED,
            num_hashes: MAX_NUM_HASHES,
            bits: vec![0u64; MAX_BLOOM_WORDS],
        };
        let bytes = encoded.encode_to_vec();
        assert!(AffinityFilter::decode(&bytes).is_ok());
    }

    #[test]
    fn large_roundtrip() {
        const TOTAL: usize = 100_000;
        const SET_COUNT: usize = TOTAL / 10;

        let items: Vec<[u8; 32]> = (0..TOTAL)
            .map(|i| {
                let mut key = [0u8; 32];
                key[..8].copy_from_slice(&(i as u64).to_le_bytes());
                key
            })
            .collect();

        let mut filter = AffinityFilter::new(TEST_SEED, BLOOM_FALSE_POS_RATE, SET_COUNT);
        for item in &items[..SET_COUNT] {
            filter.insert(item);
        }

        let expected: Vec<bool> = items.iter().map(|item| filter.contains(item)).collect();
        for i in 0..SET_COUNT {
            assert!(expected[i], "inserted item {i} must be present");
        }

        let encoded = filter.encode_to_vec();
        let decoded = AffinityFilter::decode(&encoded).expect("decoding should succeed");

        for (i, item) in items.iter().enumerate() {
            assert_eq!(decoded.contains(item), expected[i], "mismatch for item {i}");
        }

        assert_eq!(
            encoded,
            decoded.encode_to_vec(),
            "re-encoding should produce identical bytes"
        );
    }

    #[test]
    fn encoding_snapshot() {
        const ITEM_COUNT: usize = 10_000;

        let items: Vec<[u8; 32]> = (0..ITEM_COUNT)
            .map(|i| {
                let mut key = [0u8; 32];
                key[..8].copy_from_slice(&(i as u64).to_le_bytes());
                key
            })
            .collect();

        let mut filter = AffinityFilter::new(TEST_SEED, BLOOM_FALSE_POS_RATE, ITEM_COUNT);
        for item in &items {
            filter.insert(item);
        }

        let encoded = filter.encode_to_vec();

        let digest: [u8; 32] = blake2_rfc::blake2b::blake2b(32, &[], &encoded)
            .as_bytes()
            .try_into()
            .unwrap();
        assert_eq!(
            digest,
            [
                180, 34, 58, 78, 198, 24, 137, 83, 154, 127, 9, 152, 171, 50, 197, 27, 242, 158,
                30, 79, 143, 192, 53, 151, 174, 106, 132, 105, 20, 145, 133, 0
            ],
            "blake2_256 digest must match polkadot-sdk snapshot"
        );

        let decoded = AffinityFilter::decode(&encoded).expect("snapshot must decode");
        for (i, item) in items.iter().enumerate() {
            assert!(
                decoded.contains(item),
                "item {i} must be present after decoding"
            );
        }

        let absent: [u8; 32] = [0xFF; 32];
        assert!(!decoded.contains(&absent));
    }

    #[test]
    fn matches_empty_topics_is_broadcast() {
        let filter = AffinityFilter::new(TEST_SEED, BLOOM_FALSE_POS_RATE, 1);
        assert!(filter.matches_statement(&[]));
    }

    #[test]
    fn matches_inserted_topic() {
        let topic: Topic = [0xAA; 32];
        let mut filter = AffinityFilter::new(TEST_SEED, BLOOM_FALSE_POS_RATE, 1);
        filter.insert(&topic);
        assert!(filter.matches_statement(&[&topic]));
    }

    #[test]
    fn no_match_missing_topic() {
        let inserted: Topic = [0xAA; 32];
        let missing: Topic = [0xBB; 32];
        let mut filter = AffinityFilter::new(TEST_SEED, BLOOM_FALSE_POS_RATE, 1);
        filter.insert(&inserted);
        assert!(!filter.matches_statement(&[&missing]));
    }

    #[test]
    fn matches_any_topic() {
        let inserted: Topic = [0xAA; 32];
        let missing: Topic = [0xBB; 32];
        let mut filter = AffinityFilter::new(TEST_SEED, BLOOM_FALSE_POS_RATE, 2);
        filter.insert(&inserted);
        assert!(filter.matches_statement(&[&missing, &inserted]));
    }
}