freenet 0.2.38

Freenet core software
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
//! Bloom filter for tracking visited peers during GET/SUBSCRIBE operations.
//!
//! This module provides a space-efficient probabilistic data structure for tracking
//! which peers have been visited during contract search operations. Using a bloom filter
//! instead of a HashSet provides:
//!
//! - Fixed 64-byte size regardless of visited peer count
//! - Privacy protection via transaction-specific hashing
//! - Efficient serialization for network transmission

use std::net::SocketAddr;

use ahash::RandomState;
use serde::{Deserialize, Serialize};
use serde_with::serde_as;

use crate::message::Transaction;

/// Number of bits in the bloom filter (512 bits = 64 bytes).
const BLOOM_BITS: usize = 512;

/// Number of bytes in the bloom filter.
const BLOOM_BYTES: usize = BLOOM_BITS / 8;

/// Number of hash functions to use (double-hashing generates 4 from 2).
const NUM_HASHES: usize = 4;

/// A bloom filter for tracking visited peers during contract search operations.
///
/// Uses the transaction ID as a hash seed to prevent topology inference attacks.
/// An observer cannot correlate visited sets across different transactions.
///
/// # False Positive Rates (k=4 hash functions, m=512 bits)
/// - 10 peers: ~0.006% (1 in 17,000)
/// - 20 peers: ~0.04% (1 in 2,500)
/// - 30 peers: ~0.2% (1 in 500)
#[serde_as]
#[derive(Clone, Serialize, Deserialize)]
pub struct VisitedPeers {
    /// The bloom filter bits (64 bytes = 512 bits).
    #[serde_as(as = "[_; BLOOM_BYTES]")]
    bits: [u8; BLOOM_BYTES],
    /// Hash keys derived from transaction ID for privacy.
    /// Skipped during serialization - receiver reconstructs from transaction ID.
    #[serde(skip)]
    hash_keys: (u64, u64),
}

impl std::fmt::Debug for VisitedPeers {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        let set_bits = self.bits.iter().map(|b| b.count_ones()).sum::<u32>();
        f.debug_struct("VisitedPeers")
            .field("set_bits", &set_bits)
            .field("total_bits", &BLOOM_BITS)
            .finish()
    }
}

impl VisitedPeers {
    /// Creates a new empty bloom filter using the transaction ID as hash key.
    ///
    /// The transaction ID is used to derive hash keys, ensuring that:
    /// - The same peer address produces different hashes in different transactions
    /// - An observer cannot correlate visited sets across transactions
    pub fn new(tx: &Transaction) -> Self {
        let tx_bytes = tx.id_bytes();
        Self {
            bits: [0u8; BLOOM_BYTES],
            hash_keys: Self::derive_hash_keys(&tx_bytes),
        }
    }

    /// Sets the hash keys from a transaction after deserialization.
    ///
    /// When a VisitedPeers is deserialized, hash_keys are zeroed. This method
    /// must be called to restore them from the transaction ID before using
    /// `mark_visited` or `probably_visited`.
    pub fn with_transaction(mut self, tx: &Transaction) -> Self {
        let tx_bytes = tx.id_bytes();
        self.hash_keys = Self::derive_hash_keys(&tx_bytes);
        self
    }

    /// Derives hash keys from transaction bytes.
    fn derive_hash_keys(tx_bytes: &[u8; 16]) -> (u64, u64) {
        let key0 = u64::from_le_bytes([
            tx_bytes[0],
            tx_bytes[1],
            tx_bytes[2],
            tx_bytes[3],
            tx_bytes[4],
            tx_bytes[5],
            tx_bytes[6],
            tx_bytes[7],
        ]);
        let key1 = u64::from_le_bytes([
            tx_bytes[8],
            tx_bytes[9],
            tx_bytes[10],
            tx_bytes[11],
            tx_bytes[12],
            tx_bytes[13],
            tx_bytes[14],
            tx_bytes[15],
        ]);
        (key0, key1)
    }

    /// Marks a peer address as visited.
    pub fn mark_visited(&mut self, addr: SocketAddr) {
        for idx in self.hash_indices(&addr) {
            let byte_idx = idx / 8;
            let bit_idx = idx % 8;
            self.bits[byte_idx] |= 1 << bit_idx;
        }
    }

    /// Checks if a peer address has probably been visited.
    ///
    /// Returns `true` if the address is probably in the set (may have false positives),
    /// or `false` if the address is definitely not in the set (no false negatives).
    pub fn probably_visited(&self, addr: SocketAddr) -> bool {
        for idx in self.hash_indices(&addr) {
            let byte_idx = idx / 8;
            let bit_idx = idx % 8;
            if self.bits[byte_idx] & (1 << bit_idx) == 0 {
                return false;
            }
        }
        true
    }

    /// Generates bloom filter indices using double-hashing.
    ///
    /// Uses the technique from "Less Hashing, Same Performance: Building a Better
    /// Bloom Filter" (Kirsch & Mitzenmacher, 2006). Double-hashing computes h1 and h2,
    /// then generates k hashes as h1 + i*h2 for i in 0..k.
    /// This is more efficient than computing k independent hashes and provides
    /// equivalent probabilistic guarantees.
    fn hash_indices(&self, addr: &SocketAddr) -> [usize; NUM_HASHES] {
        // Use two independent hash functions with different seeds.
        // The constants are derived from the golden ratio (phi) and are commonly
        // used in hash function mixing to ensure good bit distribution.
        // 0x9e3779b97f4a7c15 = 2^64 / phi (golden ratio constant)
        // 0x517cc1b727220a95 = another well-distributed odd constant
        let state1 = RandomState::with_seeds(self.hash_keys.0, self.hash_keys.1, 0, 0);

        let state2 = RandomState::with_seeds(
            self.hash_keys.0.wrapping_add(0x9e3779b97f4a7c15),
            self.hash_keys.1.wrapping_add(0x517cc1b727220a95),
            0,
            0,
        );

        // Compute two independent base hashes
        let h1 = state1.hash_one(addr);
        let h2 = state2.hash_one(addr);

        // Generate NUM_HASHES indices using double-hashing: h1 + i*h2
        [
            (h1 as usize) % BLOOM_BITS,
            (h1.wrapping_add(h2) as usize) % BLOOM_BITS,
            (h1.wrapping_add(h2.wrapping_mul(2)) as usize) % BLOOM_BITS,
            (h1.wrapping_add(h2.wrapping_mul(3)) as usize) % BLOOM_BITS,
        ]
    }
}

/// Default implementation creates an empty bloom filter with zero hash keys.
/// The bloom filter won't work correctly until hash keys are set via `with_transaction()`.
/// This is useful for tests or when the bloom filter will be restored after deserialization.
impl Default for VisitedPeers {
    fn default() -> Self {
        Self {
            bits: [0u8; BLOOM_BYTES],
            hash_keys: (0, 0),
        }
    }
}

/// Implement Contains trait for use with k_closest_potentially_hosting.
impl crate::util::Contains<SocketAddr> for VisitedPeers {
    fn has_element(&self, target: SocketAddr) -> bool {
        self.probably_visited(target)
    }
}

/// Also implement for references.
impl crate::util::Contains<SocketAddr> for &VisitedPeers {
    fn has_element(&self, target: SocketAddr) -> bool {
        self.probably_visited(target)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::message::Transaction;
    use crate::operations::get::GetMsg;

    fn test_transaction() -> Transaction {
        Transaction::new::<GetMsg>()
    }

    #[test]
    fn test_basic_operations() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);
        let addr: SocketAddr = "127.0.0.1:8000".parse().unwrap();

        // Initially not visited
        assert!(!visited.probably_visited(addr));

        // After marking, should be visited
        visited.mark_visited(addr);
        assert!(visited.probably_visited(addr));

        // Different address should not be visited
        let other_addr: SocketAddr = "127.0.0.1:9000".parse().unwrap();
        assert!(!visited.probably_visited(other_addr));
    }

    #[test]
    fn test_multiple_addresses() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);

        let addrs: Vec<SocketAddr> = (8000..8020)
            .map(|port| format!("127.0.0.1:{}", port).parse().unwrap())
            .collect();

        // Mark all addresses
        for addr in &addrs {
            visited.mark_visited(*addr);
        }

        // All should be probably visited
        for addr in &addrs {
            assert!(
                visited.probably_visited(*addr),
                "Address {} should be visited",
                addr
            );
        }
    }

    #[test]
    fn test_transaction_isolation() {
        let tx1 = test_transaction();
        let tx2 = test_transaction();

        let v1 = VisitedPeers::new(&tx1);
        let v2 = VisitedPeers::new(&tx2);

        // Different transactions MUST use different hash keys (deterministic property)
        assert_ne!(
            v1.hash_keys, v2.hash_keys,
            "Different transactions must produce different hash keys"
        );

        // Same transaction should produce identical hash keys (deterministic)
        let v1_again = VisitedPeers::new(&tx1);
        assert_eq!(
            v1.hash_keys, v1_again.hash_keys,
            "Same transaction must produce identical hash keys"
        );
    }

    #[test]
    fn test_serialization_roundtrip() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);

        let addrs: Vec<SocketAddr> = vec![
            "127.0.0.1:8000".parse().unwrap(),
            "192.168.1.1:9000".parse().unwrap(),
            "[::1]:8080".parse().unwrap(),
        ];

        for addr in &addrs {
            visited.mark_visited(*addr);
        }

        // Serialize and deserialize
        let bytes = bincode::serialize(&visited).expect("serialization failed");
        let deserialized: VisitedPeers =
            bincode::deserialize(&bytes).expect("deserialization failed");

        // Restore hash keys from transaction (required after deserialization)
        let deserialized = deserialized.with_transaction(&tx);

        // All marked addresses should still be visited
        for addr in &addrs {
            assert!(
                deserialized.probably_visited(*addr),
                "Address {} should be visited after roundtrip",
                addr
            );
        }
    }

    #[test]
    fn test_size_is_fixed() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);

        // Size should be fixed regardless of how many addresses we add
        let initial_size = std::mem::size_of_val(&visited.bits);
        assert_eq!(initial_size, BLOOM_BYTES);

        for port in 8000..8100 {
            let addr: SocketAddr = format!("127.0.0.1:{}", port).parse().unwrap();
            visited.mark_visited(addr);
        }

        let final_size = std::mem::size_of_val(&visited.bits);
        assert_eq!(final_size, BLOOM_BYTES);
    }

    #[test]
    fn test_false_positive_rate() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);

        // Insert 20 addresses (simulating HTL=20 worst case)
        let inserted_addrs: Vec<SocketAddr> = (8000..8020)
            .map(|port| format!("127.0.0.1:{}", port).parse().unwrap())
            .collect();

        for addr in &inserted_addrs {
            visited.mark_visited(*addr);
        }

        // Check 10,000 random addresses that were NOT inserted
        // Using more samples for statistical significance
        let mut false_positives = 0;
        for port in 10000..20000 {
            let addr: SocketAddr = format!("10.0.0.1:{}", port).parse().unwrap();
            if visited.probably_visited(addr) {
                false_positives += 1;
            }
        }

        // With 20 elements in 512-bit filter with k=4, expected FP rate is ~0.04%
        // Allow up to 0.5% (50 out of 10,000) for test stability - still 12x expected rate
        let fp_rate = false_positives as f64 / 10000.0 * 100.0;
        assert!(
            false_positives <= 50,
            "False positive rate too high: {}/10000 = {:.3}% (expected ~0.04%)",
            false_positives,
            fp_rate
        );
    }

    #[test]
    fn test_no_false_negatives() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);

        // Insert many addresses
        let addrs: Vec<SocketAddr> = (8000..8050)
            .map(|port| format!("127.0.0.1:{}", port).parse().unwrap())
            .collect();

        for addr in &addrs {
            visited.mark_visited(*addr);
        }

        // Every inserted address MUST be detected (no false negatives)
        for addr in &addrs {
            assert!(
                visited.probably_visited(*addr),
                "False negative detected for {}",
                addr
            );
        }
    }

    #[test]
    fn test_serialized_size() {
        let tx = test_transaction();
        let visited = VisitedPeers::new(&tx);

        // Serialize with bincode
        let bytes = bincode::serialize(&visited).expect("serialization failed");

        // Expected size: 64 bytes (bits only, hash_keys are skipped)
        // bincode adds a small length prefix for the array
        assert_eq!(
            bytes.len(),
            64,
            "Serialized size should be exactly 64 bytes, got {}",
            bytes.len()
        );

        // Verify the size is fixed regardless of content
        let mut visited_full = VisitedPeers::new(&tx);
        for port in 8000..8100 {
            let addr: SocketAddr = format!("127.0.0.1:{}", port).parse().unwrap();
            visited_full.mark_visited(addr);
        }
        let bytes_full = bincode::serialize(&visited_full).expect("serialization failed");
        assert_eq!(
            bytes.len(),
            bytes_full.len(),
            "Serialized size should be fixed regardless of marked peers"
        );
    }

    /// Test documenting the correct behavior for request forwarding.
    ///
    /// When a node receives a forwarded GET/SUBSCRIBE request, it MUST mark BOTH:
    /// 1. Its own address (this_peer) - so requests won't be routed back to it
    /// 2. The sender's address (source_addr) - so requests won't cycle back to the sender
    ///
    /// This is critical for preventing request cycles, especially in scenarios where
    /// a node has limited connections (e.g., only connected to a gateway).
    ///
    /// Bug reproduction scenario (before fix):
    /// 1. Node A (originator, single connection to gateway) sends GET request
    /// 2. Gateway marks itself in visited, forwards to other peers
    /// 3. Other peers mark themselves, but NOT the sender
    /// 4. Eventually, some peer routes back to Node A (not in visited filter)
    /// 5. Node A receives its own request, can't forward (only peer is gateway, already in filter)
    /// 6. Request fails with "no upstream" error
    #[test]
    fn test_forwarding_must_mark_both_this_peer_and_sender() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);

        // Simulate the forwarding scenario
        let this_peer: SocketAddr = "10.0.0.1:8000".parse().unwrap();
        let sender: SocketAddr = "10.0.0.2:8000".parse().unwrap();
        let originator: SocketAddr = "10.0.0.3:8000".parse().unwrap();

        // When processing a forwarded request, mark BOTH this_peer AND sender
        visited.mark_visited(this_peer);
        visited.mark_visited(sender);

        // Both should be marked to prevent routing back to them
        assert!(
            visited.probably_visited(this_peer),
            "this_peer must be marked to prevent routing back to processing node"
        );
        assert!(
            visited.probably_visited(sender),
            "sender must be marked to prevent routing back to request source"
        );

        // The originator is still valid for routing (until they receive and mark themselves)
        // This documents that the originator should also be marked at the START of the operation
        assert!(
            !visited.probably_visited(originator),
            "originator not marked yet - they should mark themselves when initiating"
        );
    }

    /// Test that simulates the full routing cycle prevention.
    ///
    /// This tests the complete chain of marking that should prevent request cycles:
    /// Originator -> Gateway -> Peer1 -> Peer2 -> ... -> back to originator (should be blocked)
    #[test]
    fn test_request_cycle_prevention() {
        let tx = test_transaction();
        let mut visited = VisitedPeers::new(&tx);

        // Simulate the routing chain:
        // Originator (A) -> Gateway (G) -> Peer1 (P1) -> Peer2 (P2)
        let originator: SocketAddr = "10.0.0.1:8000".parse().unwrap();
        let gateway: SocketAddr = "10.0.0.2:8000".parse().unwrap();
        let peer1: SocketAddr = "10.0.0.3:8000".parse().unwrap();
        let peer2: SocketAddr = "10.0.0.4:8000".parse().unwrap();

        // BUG CONTEXT: In the original bug, only this_peer was marked at each hop,
        // NOT the sender. This meant requests could cycle back to earlier nodes.
        // The fix ensures each node marks BOTH itself AND its sender.

        // Step 1: Gateway receives from originator
        // Gateway marks: itself (gateway) AND sender (originator)
        visited.mark_visited(gateway);
        visited.mark_visited(originator);

        // Step 2: Peer1 receives from gateway
        // Peer1 marks: itself (peer1) AND sender (gateway)
        visited.mark_visited(peer1);
        visited.mark_visited(gateway); // Already marked, but this is what the code does

        // Step 3: Peer2 receives from peer1
        // Peer2 marks: itself (peer2) AND sender (peer1)
        visited.mark_visited(peer2);
        visited.mark_visited(peer1); // Already marked, but this is what the code does

        // Now, all nodes in the chain should be marked
        assert!(
            visited.probably_visited(originator),
            "originator must be blocked"
        );
        assert!(visited.probably_visited(gateway), "gateway must be blocked");
        assert!(visited.probably_visited(peer1), "peer1 must be blocked");
        assert!(visited.probably_visited(peer2), "peer2 must be blocked");

        // A new peer (peer3) can still be routed to
        let peer3: SocketAddr = "10.0.0.5:8000".parse().unwrap();
        assert!(
            !visited.probably_visited(peer3),
            "unvisited peer should still be valid for routing"
        );
    }
}