freenet 0.2.27

Freenet core software
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
//! Exponential backoff for failed connection attempts by location.
//!
//! When connection attempts fail (either due to routing failure or connect operation failure),
//! we apply exponential backoff before retrying to avoid spamming small/saturated networks.
//!
//! This module uses location buckets to group nearby locations together, reducing memory
//! usage while still providing effective backoff for clustered targets.

use crate::util::backoff::{ExponentialBackoff, TrackedBackoff};
use std::time::Duration;

use super::Location;

/// Types of connection failures for backoff calculation.
///
/// Different failure types may require different backoff strategies.
/// Active variants:
/// - `RoutingFailed`: No peer available to route through (normal exponential backoff)
/// - `Timeout`: Operation exceeded TTL (escalated backoff)
/// - `Rejected`: Peer explicitly rejected connection at capacity (normal backoff)
///
/// # Future Variants
///
/// The following variants are defined for future protocol enhancements:
/// - `NatPunchFailed`: NAT hole punch failed (transient)
/// - `HandshakeError`: Handshake failure after connection established
/// - `TransientError`: Transient network issues
///
/// See https://github.com/freenet/freenet-core/issues/2663 for discussion.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConnectionFailureReason {
    /// Routing failed - no peer available to route through
    RoutingFailed,
    /// Connect operation timed out waiting for response
    Timeout,
    /// Remote peer explicitly rejected connection (at capacity, etc.)
    Rejected,
    /// NAT hole punch failed
    #[allow(dead_code)]
    NatPunchFailed,
    /// Handshake error after connection established
    #[allow(dead_code)]
    HandshakeError,
    /// Transient error (network issue, retry soon)
    #[allow(dead_code)]
    TransientError,
}

/// Bucket for location - we group nearby locations to avoid tracking too many entries.
/// Uses 256 buckets across the [0, 1] ring.
///
/// Note: This intentionally groups nearby locations together. If a connection to one
/// location in a bucket fails, we'll delay retrying all locations in that bucket.
/// This is a tradeoff: it reduces memory usage and prevents rapid retries to
/// clustered locations, but may delay legitimate connections to nearby peers.
/// With 256 buckets, each covers ~0.4% of the ring.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct LocationBucket(u8);

impl LocationBucket {
    fn from_location(loc: Location) -> Self {
        // Location is in [0, 1], multiply by 256 and clamp to handle edge case at 1.0
        let bucket = (loc.as_f64() * 256.0).min(255.0) as u8;
        Self(bucket)
    }
}

/// Tracks backoff state for failed connection targets by location.
///
/// Uses exponential backoff: `base_interval * 2^(consecutive_failures-1)` capped at `max_backoff`.
/// First failure = base_interval, second = 2x, third = 4x, etc.
///
/// Locations are grouped into 256 buckets to reduce memory usage while still providing
/// effective backoff for nearby targets.
///
/// # Failure Escalation
///
/// Timeout failures are escalated (recorded as 2 failures) while routing failures are
/// recorded normally (1 failure). This creates different backoff curves:
/// - **Timeout**: 1st → 2x backoff, 2nd → 4x, 3rd → 8x (escalates faster)
/// - **Routing**: 1st → 1x backoff, 2nd → 2x, 3rd → 4x (normal exponential)
///
/// Timeouts suggest the peer is overloaded, unresponsive, or unreachable, warranting
/// more aggressive backoff. Routing failures indicate no peer was available in that
/// direction and are less severe.
#[derive(Debug)]
pub struct ConnectionBackoff {
    inner: TrackedBackoff<LocationBucket>,
}

impl Default for ConnectionBackoff {
    fn default() -> Self {
        Self::new()
    }
}

impl ConnectionBackoff {
    /// Default base backoff interval (30 seconds).
    ///
    /// This is set high enough that even the first failure creates meaningful backoff.
    /// Connect requests arrive approximately every 60 seconds (operation timeout interval),
    /// so a 30-second base ensures the first failure already blocks half of subsequent attempts.
    /// See issue #2595 for context.
    const DEFAULT_BASE_INTERVAL: Duration = Duration::from_secs(30);

    /// Default maximum backoff interval (10 minutes).
    ///
    /// With 30s base and exponential growth (30s → 60s → 120s → 240s → 480s → 600s),
    /// persistent failures quickly escalate to meaningful delays that prevent resource
    /// waste on known-unreachable peers. See issue #2595.
    const DEFAULT_MAX_BACKOFF: Duration = Duration::from_secs(600);

    /// Default maximum number of tracked entries
    const DEFAULT_MAX_ENTRIES: usize = 256;

    /// Escalation factor for timeout failures.
    ///
    /// Timeout failures are escalated by recording this many failures internally,
    /// which causes exponential backoff to escalate faster. A value of 2 means one
    /// timeout is treated as 2 consecutive failures, doubling the backoff delay.
    /// This ensures that unresponsive peers are backed off more aggressively than
    /// peers that simply reject due to capacity.
    const TIMEOUT_FAILURE_ESCALATION: u32 = 2;

    /// Create a new backoff tracker with default settings.
    pub fn new() -> Self {
        let config =
            ExponentialBackoff::new(Self::DEFAULT_BASE_INTERVAL, Self::DEFAULT_MAX_BACKOFF);
        Self {
            inner: TrackedBackoff::new(config, Self::DEFAULT_MAX_ENTRIES),
        }
    }

    /// Create a new backoff tracker with custom settings.
    #[cfg(test)]
    pub fn with_config(base_interval: Duration, max_backoff: Duration, max_entries: usize) -> Self {
        let config = ExponentialBackoff::new(base_interval, max_backoff);
        Self {
            inner: TrackedBackoff::new(config, max_entries),
        }
    }

    /// Check if a target location is currently in backoff.
    ///
    /// Returns `true` if we should skip this target, `false` if we can attempt connection.
    pub fn is_in_backoff(&self, target: Location) -> bool {
        let bucket = LocationBucket::from_location(target);
        self.inner.is_in_backoff(&bucket)
    }

    /// Record a connection failure for a target location.
    ///
    /// Increments the failure count and calculates the next retry time.
    /// This is a convenience method that records a routing failure (non-escalated).
    #[allow(dead_code)] // Used by tests and as backward-compatible API
    pub fn record_failure(&mut self, target: Location) {
        self.record_failure_with_reason(target, ConnectionFailureReason::RoutingFailed);
    }

    /// Record a connection failure with the given reason.
    ///
    /// Different failure types may result in different backoff calculations:
    /// - Timeout failures apply stronger backoff (escalate faster)
    /// - Transient errors apply minimal backoff
    /// - Other failures use normal exponential backoff
    pub fn record_failure_with_reason(
        &mut self,
        target: Location,
        reason: ConnectionFailureReason,
    ) {
        let bucket = LocationBucket::from_location(target);
        let failures_before = self.inner.failure_count(&bucket);

        // Apply different backoff based on failure type by recording multiple failures
        let num_failures_to_record = match reason {
            ConnectionFailureReason::Timeout => {
                // Timeout suggests overload - escalate faster using the configured escalation factor
                Self::TIMEOUT_FAILURE_ESCALATION
            }
            ConnectionFailureReason::TransientError => {
                // Transient errors resolve quickly - record just 1 failure
                1
            }
            ConnectionFailureReason::RoutingFailed
            | ConnectionFailureReason::Rejected
            | ConnectionFailureReason::NatPunchFailed
            | ConnectionFailureReason::HandshakeError => {
                // Normal exponential backoff - record 1 failure
                1
            }
        };

        // Record the appropriate number of failures to the internal tracker
        for _ in 0..num_failures_to_record {
            self.inner.record_failure(bucket);
        }

        // Calculate the actual backoff that will be applied
        let actual_failures_after = self.inner.failure_count(&bucket);
        let backoff = self
            .inner
            .config()
            .delay_for_failures(actual_failures_after);
        tracing::debug!(
            bucket = bucket.0,
            failures_before = failures_before,
            failures_after = actual_failures_after,
            reason = ?reason,
            backoff_secs = backoff.as_secs(),
            "Connection target in backoff (with reason)"
        );
    }

    /// Record a successful connection to a target location.
    ///
    /// Clears the backoff state for that location bucket.
    pub fn record_success(&mut self, target: Location) {
        let bucket = LocationBucket::from_location(target);
        if self.inner.failure_count(&bucket) > 0 {
            tracing::debug!(bucket = bucket.0, "Connection target backoff cleared");
        }
        self.inner.record_success(&bucket);
    }

    /// Clean up expired backoff entries (those past their retry time and stale).
    ///
    /// Removes entries that are both past their retry_after time AND have been
    /// in backoff for longer than max_backoff (i.e., stale entries that haven't
    /// had recent failures). Called periodically to prevent unbounded growth.
    pub fn cleanup_expired(&mut self) {
        self.inner.cleanup_expired();
    }

    /// Clear all backoff state. Used during isolation recovery when the node
    /// has had zero connections for too long and needs to retry aggressively.
    pub fn clear(&mut self) {
        self.inner.clear();
    }

    /// Get the failure count for a location (for testing).
    #[cfg(test)]
    fn failure_count(&self, target: Location) -> u32 {
        let bucket = LocationBucket::from_location(target);
        self.inner.failure_count(&bucket)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_backoff_not_in_backoff_initially() {
        let backoff = ConnectionBackoff::new();
        let loc = Location::new(0.5);
        assert!(!backoff.is_in_backoff(loc));
    }

    #[test]
    fn test_backoff_after_failure() {
        let mut backoff = ConnectionBackoff::new();
        let loc = Location::new(0.5);

        backoff.record_failure(loc);
        assert!(backoff.is_in_backoff(loc));
    }

    #[test]
    fn test_backoff_cleared_on_success() {
        let mut backoff = ConnectionBackoff::new();
        let loc = Location::new(0.5);

        backoff.record_failure(loc);
        assert!(backoff.is_in_backoff(loc));

        backoff.record_success(loc);
        assert!(!backoff.is_in_backoff(loc));
    }

    #[test]
    fn test_exponential_backoff_calculation() {
        let config = ExponentialBackoff::new(Duration::from_secs(1), Duration::from_secs(300));

        // Formula: base * 2^(n-1) via delay_for_failures
        assert_eq!(config.delay_for_failures(1), Duration::from_secs(1));
        assert_eq!(config.delay_for_failures(2), Duration::from_secs(2));
        assert_eq!(config.delay_for_failures(3), Duration::from_secs(4));
        assert_eq!(config.delay_for_failures(4), Duration::from_secs(8));
    }

    #[test]
    fn test_backoff_capped_at_max() {
        let config = ExponentialBackoff::new(Duration::from_secs(10), Duration::from_secs(60));

        // After many failures, should be capped at 60s
        assert_eq!(config.delay_for_failures(10), Duration::from_secs(60));
        assert_eq!(config.delay_for_failures(20), Duration::from_secs(60));
    }

    #[test]
    fn test_nearby_locations_share_bucket() {
        let mut backoff = ConnectionBackoff::new();

        // These should be in the same bucket (0.500 and 0.501)
        let loc1 = Location::new(0.500);
        let loc2 = Location::new(0.501);

        backoff.record_failure(loc1);
        // loc2 should also be in backoff since they share a bucket
        assert!(backoff.is_in_backoff(loc2));
    }

    #[test]
    fn test_distant_locations_different_buckets() {
        let mut backoff = ConnectionBackoff::new();

        // These should be in different buckets
        let loc1 = Location::new(0.1);
        let loc2 = Location::new(0.9);

        backoff.record_failure(loc1);
        // loc2 should NOT be in backoff
        assert!(!backoff.is_in_backoff(loc2));
    }

    #[test]
    fn test_eviction_when_max_entries_exceeded() {
        let mut backoff = ConnectionBackoff::with_config(
            Duration::from_secs(5),
            Duration::from_secs(300),
            10, // Very low max for testing
        );

        // Add more than max entries
        for i in 0..20 {
            let loc = Location::new(i as f64 / 256.0);
            backoff.record_failure(loc);
        }

        // Should have at most max_entries
        assert!(backoff.inner.len() <= 10);
    }

    #[test]
    fn test_consecutive_failures_increase_backoff() {
        let mut backoff =
            ConnectionBackoff::with_config(Duration::from_secs(1), Duration::from_secs(300), 256);

        let loc = Location::new(0.5);

        // First failure
        backoff.record_failure(loc);
        assert_eq!(backoff.failure_count(loc), 1);

        // Second failure
        backoff.record_failure(loc);
        assert_eq!(backoff.failure_count(loc), 2);
    }

    #[test]
    fn test_failure_reason_timeout_escalates_faster() {
        let mut backoff =
            ConnectionBackoff::with_config(Duration::from_secs(1), Duration::from_secs(300), 256);

        let loc1 = Location::new(0.5);
        let loc2 = Location::new(0.6);

        // Record single failures with different reasons
        backoff.record_failure_with_reason(loc1, ConnectionFailureReason::Timeout);
        backoff.record_failure_with_reason(loc2, ConnectionFailureReason::RoutingFailed);

        // Both should be in backoff
        assert!(backoff.is_in_backoff(loc1));
        assert!(backoff.is_in_backoff(loc2));

        // Timeout should have recorded 2 failures (escalated) while routing failure recorded 1
        assert_eq!(
            backoff.failure_count(loc1),
            2,
            "Timeout should escalate to 2 failures"
        );
        assert_eq!(
            backoff.failure_count(loc2),
            1,
            "Routing failure should stay at 1 failure"
        );

        // Verify that timeout produces longer backoff duration than routing failure
        // timeout with 2 failures: base * 2^(2-1) = 1s * 2 = 2s
        // routing_failed with 1 failure: base * 2^(1-1) = 1s * 1 = 1s
        let timeout_backoff = backoff.inner.config().delay_for_failures(2);
        let routing_backoff = backoff.inner.config().delay_for_failures(1);
        assert!(
            timeout_backoff > routing_backoff,
            "Timeout backoff should be longer"
        );
    }

    #[test]
    fn test_failure_reason_transient_error_minimal_backoff() {
        let mut backoff =
            ConnectionBackoff::with_config(Duration::from_secs(1), Duration::from_secs(300), 256);

        let loc = Location::new(0.5);

        // Record a transient error
        backoff.record_failure_with_reason(loc, ConnectionFailureReason::TransientError);

        // Should still record the failure
        assert_eq!(backoff.failure_count(loc), 1);
        // Transient errors should still be in backoff but with minimal escalation
        assert!(backoff.is_in_backoff(loc));
    }

    #[test]
    fn test_failure_reason_rejected_normal_backoff() {
        let mut backoff =
            ConnectionBackoff::with_config(Duration::from_secs(1), Duration::from_secs(300), 256);

        let loc = Location::new(0.5);

        // Record a rejection
        backoff.record_failure_with_reason(loc, ConnectionFailureReason::Rejected);

        // Should be in backoff with normal escalation
        assert_eq!(backoff.failure_count(loc), 1);
        assert!(backoff.is_in_backoff(loc));
    }

    #[test]
    fn test_all_failure_reasons_recorded() {
        let mut backoff =
            ConnectionBackoff::with_config(Duration::from_secs(1), Duration::from_secs(300), 256);

        let reasons = [
            ConnectionFailureReason::RoutingFailed,
            ConnectionFailureReason::Timeout,
            ConnectionFailureReason::Rejected,
            ConnectionFailureReason::NatPunchFailed,
            ConnectionFailureReason::HandshakeError,
            ConnectionFailureReason::TransientError,
        ];

        for (i, reason) in reasons.iter().enumerate() {
            let loc = Location::new(i as f64 / 256.0);
            backoff.record_failure_with_reason(loc, *reason);
            assert!(
                backoff.is_in_backoff(loc),
                "Location should be in backoff after {:?}",
                reason
            );
        }
    }

    #[test]
    fn test_success_clears_all_failure_reasons() {
        let mut backoff =
            ConnectionBackoff::with_config(Duration::from_secs(1), Duration::from_secs(300), 256);

        let loc = Location::new(0.5);

        // Record failure with reason
        backoff.record_failure_with_reason(loc, ConnectionFailureReason::Timeout);
        assert!(backoff.is_in_backoff(loc));

        // Success should clear it regardless of reason
        backoff.record_success(loc);
        assert!(!backoff.is_in_backoff(loc));
    }

    #[test]
    fn test_clear_removes_all_backoff_state() {
        let mut backoff =
            ConnectionBackoff::with_config(Duration::from_secs(1), Duration::from_secs(300), 256);

        let loc1 = Location::new(0.3);
        let loc2 = Location::new(0.7);

        backoff.record_failure(loc1);
        backoff.record_failure(loc2);
        assert!(backoff.is_in_backoff(loc1));
        assert!(backoff.is_in_backoff(loc2));

        backoff.clear();
        assert!(!backoff.is_in_backoff(loc1));
        assert!(!backoff.is_in_backoff(loc2));
        assert_eq!(backoff.inner.len(), 0);
    }
}