Skip to main content

cortex_runtime/cartography/
rate_limiter.rs

1//! Rate limiter for polite crawling.
2
3use std::sync::Arc;
4use std::time::Duration;
5use tokio::sync::Semaphore;
6use tokio::time::Instant;
7
8/// Rate limiter that enforces concurrency limits and minimum delays.
9pub struct RateLimiter {
10    semaphore: Arc<Semaphore>,
11    min_delay: Duration,
12    last_request: tokio::sync::Mutex<Instant>,
13}
14
15impl RateLimiter {
16    /// Create a new rate limiter.
17    ///
18    /// - `max_concurrent`: maximum number of concurrent requests
19    /// - `min_delay_ms`: minimum milliseconds between requests
20    pub fn new(max_concurrent: usize, min_delay_ms: u64) -> Self {
21        Self {
22            semaphore: Arc::new(Semaphore::new(max_concurrent)),
23            min_delay: Duration::from_millis(min_delay_ms),
24            last_request: tokio::sync::Mutex::new(Instant::now()),
25        }
26    }
27
28    /// Create a rate limiter from robots.txt crawl delay.
29    pub fn from_crawl_delay(crawl_delay: Option<f32>, max_concurrent: usize) -> Self {
30        let delay_ms = crawl_delay.map(|d| (d * 1000.0) as u64).unwrap_or(50);
31        Self::new(max_concurrent, delay_ms)
32    }
33
34    /// Acquire permission to make a request. Blocks until rate limit allows.
35    pub async fn acquire(&self) -> RateLimitGuard {
36        // Acquire semaphore permit
37        let permit = self.semaphore.clone().acquire_owned().await.unwrap();
38
39        // Enforce minimum delay
40        {
41            let mut last = self.last_request.lock().await;
42            let elapsed = last.elapsed();
43            if elapsed < self.min_delay {
44                tokio::time::sleep(self.min_delay - elapsed).await;
45            }
46            *last = Instant::now();
47        }
48
49        RateLimitGuard { _permit: permit }
50    }
51}
52
53/// Guard that releases the rate limiter permit when dropped.
54pub struct RateLimitGuard {
55    _permit: tokio::sync::OwnedSemaphorePermit,
56}
57
58#[cfg(test)]
59mod tests {
60    use super::*;
61
62    #[tokio::test]
63    async fn test_rate_limiter_basic() {
64        let limiter = RateLimiter::new(2, 10);
65        let _g1 = limiter.acquire().await;
66        let _g2 = limiter.acquire().await;
67        // Both acquired successfully (max_concurrent=2)
68    }
69
70    #[tokio::test]
71    async fn test_rate_limiter_from_crawl_delay() {
72        let limiter = RateLimiter::from_crawl_delay(Some(0.5), 3);
73        let _g = limiter.acquire().await;
74        // Delay of 500ms is enforced between requests
75    }
76}