pulith-fetch 0.2.0

HTTP downloading with streaming verification and atomic placement
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
use std::sync::Arc;
use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
use std::time::{Duration, Instant};
use tokio::time::sleep;

/// Token bucket algorithm implementation for bandwidth limiting.
///
/// This implementation uses atomic operations for thread safety and
/// provides async token acquisition with proper rate limiting.
///
/// The adaptive version can dynamically adjust the refill rate based on
/// network conditions and congestion control algorithms.
pub struct TokenBucket {
    tokens: AtomicU64,
    capacity: u64,
    refill_rate: AtomicU64, // Changed to AtomicU64 for dynamic adjustment
    last_refill: Arc<AtomicInstant>,
    // Adaptive rate limiting fields
    adaptive_config: Arc<AdaptiveConfig>,
    metrics: Arc<RateMetrics>,
    congestion_state: AtomicU8, // 0: normal, 1: congestion, 2: recovery
}

/// Configuration for adaptive rate limiting
#[derive(Debug, Clone)]
pub struct AdaptiveConfig {
    /// Minimum refill rate (bytes per second)
    pub min_rate: u64,
    /// Maximum refill rate (bytes per second)
    pub max_rate: u64,
    /// Target utilization threshold (0.0 to 1.0)
    pub target_utilization: f64,
    /// Congestion detection threshold (0.0 to 1.0)
    pub congestion_threshold: f64,
    /// Recovery factor when congestion is detected (0.0 to 1.0)
    pub recovery_factor: f64,
    /// Growth factor when increasing rate (1.0 to 2.0)
    pub growth_factor: f64,
    /// Measurement window for rate adjustments (in milliseconds)
    pub measurement_window_ms: u64,
}

impl Default for AdaptiveConfig {
    fn default() -> Self {
        Self {
            min_rate: 1024,              // 1KB/s minimum
            max_rate: 100 * 1024 * 1024, // 100MB/s maximum
            target_utilization: 0.8,
            congestion_threshold: 0.95,
            recovery_factor: 0.5,
            growth_factor: 1.1,
            measurement_window_ms: 1000, // 1 second window
        }
    }
}

/// Metrics for tracking rate limiting performance
#[derive(Debug)]
pub struct RateMetrics {
    /// Total bytes acquired
    pub total_bytes: AtomicU64,
    /// Total wait time in microseconds
    pub total_wait_time_us: AtomicU64,
    /// Number of acquisitions
    pub acquisition_count: AtomicU64,
    /// Number of times tokens were not immediately available
    pub wait_count: AtomicU64,
    /// Last measurement timestamp
    pub last_measurement: AtomicU64, // Unix timestamp in milliseconds
    /// Bytes acquired in current measurement window
    pub window_bytes: AtomicU64,
}

impl Default for RateMetrics {
    fn default() -> Self {
        let now = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_millis() as u64;
        Self {
            total_bytes: AtomicU64::new(0),
            total_wait_time_us: AtomicU64::new(0),
            acquisition_count: AtomicU64::new(0),
            wait_count: AtomicU64::new(0),
            last_measurement: AtomicU64::new(now),
            window_bytes: AtomicU64::new(0),
        }
    }
}

impl RateMetrics {
    pub fn record_acquisition(&self, bytes: u64, wait_time_us: u64) {
        self.total_bytes.fetch_add(bytes, Ordering::Relaxed);
        self.total_wait_time_us
            .fetch_add(wait_time_us, Ordering::Relaxed);
        self.acquisition_count.fetch_add(1, Ordering::Relaxed);
        if wait_time_us > 0 {
            self.wait_count.fetch_add(1, Ordering::Relaxed);
        }
        self.window_bytes.fetch_add(bytes, Ordering::Relaxed);
    }

    pub fn get_throughput(&self) -> f64 {
        let total_bytes = self.total_bytes.load(Ordering::Relaxed);
        let total_wait_us = self.total_wait_time_us.load(Ordering::Relaxed);
        let count = self.acquisition_count.load(Ordering::Relaxed);

        if count == 0 {
            return 0.0;
        }

        // Calculate effective throughput considering wait times
        let total_time_s = total_wait_us as f64 / 1_000_000.0;
        if total_time_s > 0.0 {
            total_bytes as f64 / total_time_s
        } else {
            0.0
        }
    }

    pub fn get_utilization(&self, current_rate: u64) -> f64 {
        let window_bytes = self.window_bytes.load(Ordering::Relaxed);
        let window_duration_s = 1.0; // 1 second window
        let expected_bytes = current_rate as f64 * window_duration_s;

        if expected_bytes > 0.0 {
            window_bytes as f64 / expected_bytes
        } else {
            0.0
        }
    }

    pub fn reset_window(&self) {
        self.window_bytes.store(0, Ordering::Relaxed);
        let now = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_millis() as u64;
        self.last_measurement.store(now, Ordering::Relaxed);
    }
}

#[derive(Debug, Clone, Copy, PartialEq)]
enum CongestionState {
    Normal = 0,
    Congestion = 1,
    Recovery = 2,
}

/// Atomic wrapper for Instant to use in atomic operations.
#[derive(Debug)]
struct AtomicInstant {
    instant: std::sync::Mutex<Instant>,
}

impl AtomicInstant {
    fn new(instant: Instant) -> Self {
        Self {
            instant: std::sync::Mutex::new(instant),
        }
    }

    fn get(&self) -> Instant {
        *self.instant.lock().unwrap()
    }

    fn set(&self, instant: Instant) {
        *self.instant.lock().unwrap() = instant;
    }
}

impl TokenBucket {
    /// Create a new TokenBucket with the specified capacity and refill rate.
    ///
    /// # Arguments
    ///
    /// * `capacity` - Maximum number of tokens the bucket can hold (in bytes)
    /// * `refill_rate` - Rate at which tokens are refilled (in bytes per second)
    pub fn new(capacity: u64, refill_rate: u64) -> Self {
        let now = Instant::now();
        Self {
            tokens: AtomicU64::new(capacity),
            capacity,
            refill_rate: AtomicU64::new(refill_rate),
            last_refill: Arc::new(AtomicInstant::new(now)),
            adaptive_config: Arc::new(AdaptiveConfig::default()),
            metrics: Arc::new(RateMetrics::default()),
            congestion_state: AtomicU8::new(CongestionState::Normal as u8),
        }
    }

    /// Create a new adaptive TokenBucket with custom configuration.
    ///
    /// # Arguments
    ///
    /// * `capacity` - Maximum number of tokens the bucket can hold (in bytes)
    /// * `refill_rate` - Initial rate at which tokens are refilled (in bytes per second)
    /// * `config` - Adaptive configuration for rate adjustments
    pub fn new_adaptive(capacity: u64, refill_rate: u64, config: AdaptiveConfig) -> Self {
        let now = Instant::now();
        Self {
            tokens: AtomicU64::new(capacity),
            capacity,
            refill_rate: AtomicU64::new(refill_rate),
            last_refill: Arc::new(AtomicInstant::new(now)),
            adaptive_config: Arc::new(config),
            metrics: Arc::new(RateMetrics::default()),
            congestion_state: AtomicU8::new(CongestionState::Normal as u8),
        }
    }

    /// Acquire the specified number of tokens, waiting if necessary.
    ///
    /// This method will block until enough tokens are available.
    ///
    /// # Arguments
    ///
    /// * `bytes` - Number of tokens (bytes) to acquire
    pub async fn acquire(&self, bytes: usize) {
        let tokens_needed = bytes as u64;
        let start_time = Instant::now();

        loop {
            // Refill tokens based on elapsed time
            self.refill();

            // Try to acquire tokens
            let current_tokens = self.tokens.load(Ordering::Relaxed);
            if current_tokens >= tokens_needed {
                // Successfully acquire tokens
                if self
                    .tokens
                    .compare_exchange_weak(
                        current_tokens,
                        current_tokens - tokens_needed,
                        Ordering::Relaxed,
                        Ordering::Relaxed,
                    )
                    .is_ok()
                {
                    let wait_time_us = start_time.elapsed().as_micros() as u64;
                    self.metrics.record_acquisition(tokens_needed, wait_time_us);

                    return;
                }
                // If compare_exchange failed, retry the loop
                continue;
            }

            // Not enough tokens, calculate wait time based on deficit
            let deficit = tokens_needed - current_tokens;
            let current_rate = self.refill_rate.load(Ordering::Relaxed);

            // Calculate exact time needed for deficit tokens
            let wait_time = Duration::from_secs_f64(deficit as f64 / current_rate as f64);

            // Wait for tokens to be available
            sleep(wait_time).await;
        }
    }

    /// Try to acquire tokens without waiting.
    ///
    /// Returns true if tokens were acquired, false otherwise.
    ///
    /// # Arguments
    ///
    /// * `bytes` - Number of tokens (bytes) to acquire
    pub fn try_acquire(&self, bytes: usize) -> bool {
        let tokens_needed = bytes as u64;

        // Refill tokens based on elapsed time
        self.refill();

        // Try to acquire tokens
        let current_tokens = self.tokens.load(Ordering::Relaxed);
        if current_tokens >= tokens_needed {
            // Successfully acquire tokens
            if self
                .tokens
                .compare_exchange_weak(
                    current_tokens,
                    current_tokens - tokens_needed,
                    Ordering::Relaxed,
                    Ordering::Relaxed,
                )
                .is_ok()
            {
                return true;
            }
        }

        false
    }

    /// Refill tokens based on elapsed time since last refill.
    fn refill(&self) {
        let now = Instant::now();
        let last_refill = self.last_refill.get();
        let elapsed = now.duration_since(last_refill);

        if elapsed.as_secs_f64() > 0.0 {
            let current_rate = self.refill_rate.load(Ordering::Relaxed);
            let tokens_to_add = (current_rate as f64 * elapsed.as_secs_f64()) as u64;
            let current_tokens = self.tokens.load(Ordering::Relaxed);
            let new_tokens = (current_tokens + tokens_to_add).min(self.capacity);

            self.tokens.store(new_tokens, Ordering::Relaxed);
            self.last_refill.set(now);
        }
    }

    /// Get the current number of tokens in the bucket.
    pub fn available_tokens(&self) -> u64 {
        self.refill();
        self.tokens.load(Ordering::Relaxed)
    }

    /// Get the current refill rate.
    pub fn current_rate(&self) -> u64 {
        self.refill_rate.load(Ordering::Relaxed)
    }

    /// Check network conditions and adjust the refill rate accordingly.
    pub fn check_and_adjust_rate(&self) {
        let config = &self.adaptive_config;
        let current_rate = self.refill_rate.load(Ordering::Relaxed);
        let utilization = self.metrics.get_utilization(current_rate);

        // Check if we should adjust the rate based on measurement window
        let now = std::time::SystemTime::now()
            .duration_since(std::time::UNIX_EPOCH)
            .unwrap()
            .as_millis() as u64;
        let last_measurement = self.metrics.last_measurement.load(Ordering::Relaxed);

        if now - last_measurement >= config.measurement_window_ms {
            self.adjust_rate_based_on_conditions(utilization);
            self.metrics.reset_window();
        }
    }

    /// Adjust the rate based on current utilization and congestion state.
    fn adjust_rate_based_on_conditions(&self, utilization: f64) {
        let config = &self.adaptive_config;
        let current_rate = self.refill_rate.load(Ordering::Relaxed);
        let current_state = self.congestion_state.load(Ordering::Relaxed);

        match current_state {
            0 => {
                // Normal state
                if utilization > config.congestion_threshold {
                    // Enter congestion state
                    self.congestion_state
                        .store(CongestionState::Congestion as u8, Ordering::Relaxed);
                    let new_rate = (current_rate as f64 * config.recovery_factor)
                        .max(config.min_rate as f64) as u64;
                    self.refill_rate.store(new_rate, Ordering::Relaxed);
                } else if utilization < config.target_utilization {
                    // Increase rate gradually
                    let new_rate = (current_rate as f64 * config.growth_factor)
                        .min(config.max_rate as f64) as u64;
                    self.refill_rate.store(new_rate, Ordering::Relaxed);
                }
            }
            1 => {
                // Congestion state
                if utilization < config.target_utilization {
                    // Enter recovery state
                    self.congestion_state
                        .store(CongestionState::Recovery as u8, Ordering::Relaxed);
                } else {
                    // Stay in congestion, further reduce rate
                    let new_rate = (current_rate as f64 * config.recovery_factor)
                        .max(config.min_rate as f64) as u64;
                    self.refill_rate.store(new_rate, Ordering::Relaxed);
                }
            }
            2 => {
                // Recovery state
                if utilization < config.congestion_threshold {
                    // Back to normal state
                    self.congestion_state
                        .store(CongestionState::Normal as u8, Ordering::Relaxed);
                    let new_rate = (current_rate as f64 * config.growth_factor)
                        .min(config.max_rate as f64) as u64;
                    self.refill_rate.store(new_rate, Ordering::Relaxed);
                } else {
                    // Still congested, go back to congestion state
                    self.congestion_state
                        .store(CongestionState::Congestion as u8, Ordering::Relaxed);
                }
            }
            _ => {}
        }
    }

    /// Get current metrics for monitoring.
    pub fn get_metrics(&self) -> &RateMetrics {
        &self.metrics
    }

    /// Force a rate adjustment (useful for testing or manual control).
    pub fn set_rate(&self, new_rate: u64) {
        let config = &self.adaptive_config;
        let clamped_rate = new_rate.clamp(config.min_rate, config.max_rate);
        self.refill_rate.store(clamped_rate, Ordering::Relaxed);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use tokio::time::{Duration, sleep};

    #[tokio::test]
    async fn test_token_bucket_basic() {
        // Create a bucket with 100 bytes capacity and 50 bytes/second refill rate
        let bucket = TokenBucket::new(100, 50);

        // Should be able to acquire 50 bytes immediately
        bucket.acquire(50).await;
        assert!(bucket.available_tokens() <= 50);

        // Should be able to acquire another 50 bytes immediately
        bucket.acquire(50).await;
        assert_eq!(bucket.available_tokens(), 0);

        // Wait a bit to ensure no immediate refill
        tokio::time::sleep(Duration::from_millis(10)).await;

        // Acquiring more should require waiting
        let start = Instant::now();
        bucket.acquire(25).await;
        let elapsed = start.elapsed();

        // Should have waited at least 0.5 seconds (25 bytes at 50 bytes/second)
        assert!(elapsed >= Duration::from_millis(450));
        assert!(elapsed <= Duration::from_millis(550));
    }

    #[tokio::test]
    async fn test_token_bucket_refill() {
        let bucket = TokenBucket::new(100, 100);

        // Acquire all tokens
        bucket.acquire(100).await;
        assert_eq!(bucket.available_tokens(), 0);

        // Wait for refill
        sleep(Duration::from_millis(100)).await;

        // Should have some tokens available
        let available = bucket.available_tokens();
        assert!(available > 5); // Should have at least 10 bytes (100 bytes/s * 0.1s)
        assert!(available <= 15); // Allow some tolerance
    }

    #[tokio::test]
    async fn test_token_bucket_concurrent() {
        let bucket = Arc::new(TokenBucket::new(1000, 100));
        let mut handles = vec![];

        // Spawn 10 concurrent tasks each trying to acquire 100 bytes
        for _ in 0..10 {
            let bucket_clone = Arc::clone(&bucket);
            let handle = tokio::spawn(async move {
                bucket_clone.acquire(100).await;
            });
            handles.push(handle);
        }

        // Wait for all tasks to complete
        for handle in handles {
            handle.await.unwrap();
        }

        // All tokens should be consumed
        assert_eq!(bucket.available_tokens(), 0);
    }

    #[tokio::test]
    async fn test_adaptive_rate_limiting() {
        let config = AdaptiveConfig {
            min_rate: 10,
            max_rate: 1000,
            target_utilization: 0.8,
            congestion_threshold: 0.9,
            recovery_factor: 0.5,
            growth_factor: 1.2,
            measurement_window_ms: 100,
        };

        let bucket = TokenBucket::new_adaptive(100, 100, config);

        // Initially should have the configured rate
        assert_eq!(bucket.current_rate(), 100);

        // Force a rate adjustment
        bucket.set_rate(500);
        assert_eq!(bucket.current_rate(), 500);

        // Test that rate is clamped to max
        bucket.set_rate(2000);
        assert_eq!(bucket.current_rate(), 1000);

        // Test that rate is clamped to min
        bucket.set_rate(5);
        assert_eq!(bucket.current_rate(), 10);
    }

    #[tokio::test]
    async fn test_congestion_detection() {
        let config = AdaptiveConfig {
            min_rate: 10,
            max_rate: 1000,
            target_utilization: 0.5,
            congestion_threshold: 0.8,
            recovery_factor: 0.5,
            growth_factor: 1.2,
            measurement_window_ms: 50,
        };

        let bucket = TokenBucket::new_adaptive(1000, 100, config);

        // Simulate high utilization by acquiring many tokens quickly
        for _ in 0..20 {
            bucket.acquire(10).await;
        }

        // Wait for measurement window
        sleep(Duration::from_millis(60)).await;

        // Manually trigger rate adjustment
        bucket.check_and_adjust_rate();

        // Acquire more tokens
        bucket.acquire(10).await;

        // Rate should have been adjusted due to congestion
        assert!(bucket.current_rate() < 100);
    }

    #[tokio::test]
    async fn test_metrics_collection() {
        let bucket = TokenBucket::new(100, 100);
        let metrics = bucket.get_metrics();

        // Initially no metrics
        assert_eq!(metrics.total_bytes.load(Ordering::Relaxed), 0);
        assert_eq!(metrics.acquisition_count.load(Ordering::Relaxed), 0);

        // Acquire tokens to update metrics
        bucket.acquire(50).await;

        // Metrics should now show the acquisition
        assert!(metrics.total_bytes.load(Ordering::Relaxed) > 0);
        assert!(metrics.acquisition_count.load(Ordering::Relaxed) > 0);
    }
}