Skip to main content

lychee_lib/ratelimit/
pool.rs

1use dashmap::DashMap;
2use http::Method;
3use reqwest::{Client, Request};
4use std::collections::HashMap;
5use std::sync::Arc;
6
7use crate::ratelimit::{
8    CacheableResponse, Host, HostConfigs, HostKey, HostStats, HostStatsMap, RateLimitConfig,
9};
10use crate::types::Result;
11use crate::{ErrorKind, Uri};
12
13/// Keep track of host-specific [`reqwest::Client`]s
14pub type ClientMap = HashMap<HostKey, reqwest::Client>;
15
16/// Manages a pool of Host instances and routes requests to appropriate hosts.
17///
18/// The `HostPool` serves as the central coordinator for per-host rate limiting.
19/// It creates host instances on-demand and provides a unified interface for
20/// executing HTTP requests with appropriate rate limiting applied.
21///
22/// # Architecture
23///
24/// - Each unique hostname gets its own Host instance with dedicated rate limiting
25/// - Hosts are created lazily when first requested
26/// - Thread-safe using `DashMap` for concurrent access to host instances
27#[derive(Debug)]
28pub struct HostPool {
29    /// Map of hostname to Host instances, created on-demand
30    hosts: DashMap<HostKey, Arc<Host>>,
31
32    /// Global configuration for rate limiting defaults
33    global_config: RateLimitConfig,
34
35    /// Per-host configuration overrides
36    host_configs: HostConfigs,
37
38    /// Fallback client for hosts without host-specific client
39    default_client: Client,
40
41    /// Host-specific clients
42    client_map: ClientMap,
43}
44
45impl HostPool {
46    /// Create a new `HostPool` with the given configuration
47    #[must_use]
48    pub fn new(
49        global_config: RateLimitConfig,
50        host_configs: HostConfigs,
51        default_client: Client,
52        client_map: ClientMap,
53    ) -> Self {
54        Self {
55            hosts: DashMap::new(),
56            global_config,
57            host_configs,
58            default_client,
59            client_map,
60        }
61    }
62
63    /// Try to execute a [`Request`] with appropriate per-host rate limiting.
64    ///
65    /// # Errors
66    ///
67    /// Fails if:
68    /// - The request URL has no valid hostname
69    /// - The underlying HTTP request fails
70    pub(crate) async fn execute_request(
71        &self,
72        request: Request,
73        needs_body: bool,
74    ) -> Result<CacheableResponse> {
75        let url = request.url();
76        let host_key = HostKey::try_from(url)?;
77        let host = self.get_or_create_host(host_key);
78        host.execute_request(request, needs_body).await
79    }
80
81    /// Try to build a [`Request`]
82    ///
83    /// # Errors
84    ///
85    /// Fails if:
86    /// - The request URI has no valid hostname
87    /// - The request fails to build
88    pub fn build_request(&self, method: Method, uri: &Uri) -> Result<Request> {
89        let host_key = HostKey::try_from(uri)?;
90        let host = self.get_or_create_host(host_key);
91        host.get_client()
92            .request(method, uri.url.clone())
93            .build()
94            .map_err(ErrorKind::BuildRequestClient)
95    }
96
97    /// Get an existing host or create a new one for the given hostname
98    fn get_or_create_host(&self, host_key: HostKey) -> Arc<Host> {
99        self.hosts
100            .entry(host_key.clone())
101            .or_insert_with(|| {
102                let host_config = self
103                    .host_configs
104                    .get(&host_key)
105                    .cloned()
106                    .unwrap_or_default();
107
108                let client = self
109                    .client_map
110                    .get(&host_key)
111                    .unwrap_or(&self.default_client)
112                    .clone();
113
114                Arc::new(Host::new(
115                    host_key,
116                    &host_config,
117                    &self.global_config,
118                    client,
119                ))
120            })
121            .value()
122            .clone()
123    }
124
125    /// Returns statistics for the host if it exists, otherwise returns empty stats.
126    /// This provides consistent behavior whether or not requests have been made to that host yet.
127    #[must_use]
128    pub fn host_stats(&self, hostname: &str) -> HostStats {
129        let host_key = HostKey::from(hostname);
130        self.hosts
131            .get(&host_key)
132            .map(|host| host.stats())
133            .unwrap_or_default()
134    }
135
136    /// Returns a `HashMap` mapping hostnames to their statistics.
137    /// Only hosts that have had requests will be included.
138    #[must_use]
139    pub fn all_host_stats(&self) -> HostStatsMap {
140        HostStatsMap::from(
141            self.hosts
142                .iter()
143                .map(|entry| {
144                    let hostname = entry.key().to_string();
145                    let stats = entry.value().stats();
146                    (hostname, stats)
147                })
148                .collect::<HashMap<_, _>>(),
149        )
150    }
151
152    /// Get the number of host instances that have been created,
153    /// which corresponds to the number of unique hostnames that have
154    /// been accessed.
155    #[must_use]
156    pub fn active_host_count(&self) -> usize {
157        self.hosts.len()
158    }
159
160    /// Get  a copy of the current host-specific configurations.
161    /// This is useful for debugging or runtime monitoring of configuration.
162    #[must_use]
163    pub fn host_configurations(&self) -> HostConfigs {
164        self.host_configs.clone()
165    }
166
167    /// Remove a host from the pool.
168    ///
169    /// This forces the host to be recreated with updated configuration
170    /// the next time a request is made to it. Any ongoing requests to
171    /// that host will continue with the old instance.
172    ///
173    /// # Returns
174    ///
175    /// Returns true if a host was removed, false if no host existed for that hostname.
176    #[must_use]
177    pub fn remove_host(&self, hostname: &str) -> bool {
178        let host_key = HostKey::from(hostname);
179        self.hosts.remove(&host_key).is_some()
180    }
181
182    /// Get cache statistics across all hosts
183    #[must_use]
184    pub fn cache_stats(&self) -> HashMap<String, (usize, f64)> {
185        self.hosts
186            .iter()
187            .map(|entry| {
188                let hostname = entry.key().to_string();
189                let cache_size = entry.value().cache_size();
190                let hit_rate = entry.value().stats().cache_hit_rate();
191                (hostname, (cache_size, hit_rate))
192            })
193            .collect()
194    }
195
196    /// Record a cache hit for the given URI in host statistics.
197    /// This tracks that a request was served from the persistent disk cache.
198    /// Note that no equivalent function for tracking cache misses is exposed,
199    /// since this is handled internally.
200    pub fn record_persistent_cache_hit(&self, uri: &crate::Uri) {
201        if !uri.is_file() && !uri.is_mail() {
202            match crate::ratelimit::HostKey::try_from(uri) {
203                Ok(key) => {
204                    let host = self.get_or_create_host(key);
205                    host.record_persistent_cache_hit();
206                }
207                Err(e) => {
208                    log::debug!("Failed to record cache hit for {uri}: {e}");
209                }
210            }
211        }
212    }
213}
214
215impl Default for HostPool {
216    fn default() -> Self {
217        Self::new(
218            RateLimitConfig::default(),
219            HostConfigs::default(),
220            Client::default(),
221            HashMap::new(),
222        )
223    }
224}
225
226#[cfg(test)]
227mod tests {
228    use super::*;
229    use crate::ratelimit::RateLimitConfig;
230
231    use url::Url;
232
233    #[test]
234    fn test_host_pool_creation() {
235        let pool = HostPool::new(
236            RateLimitConfig::default(),
237            HostConfigs::default(),
238            Client::default(),
239            HashMap::new(),
240        );
241
242        assert_eq!(pool.active_host_count(), 0);
243    }
244
245    #[test]
246    fn test_host_pool_default() {
247        let pool = HostPool::default();
248        assert_eq!(pool.active_host_count(), 0);
249    }
250
251    #[tokio::test]
252    async fn test_host_creation_on_demand() {
253        let pool = HostPool::default();
254        let url: Url = "https://example.com/path".parse().unwrap();
255        let host_key = HostKey::try_from(&url).unwrap();
256
257        // No hosts initially
258        assert_eq!(pool.active_host_count(), 0);
259        assert_eq!(pool.host_stats("example.com").total_requests, 0);
260
261        // Create host on demand
262        let host = pool.get_or_create_host(host_key);
263
264        // Now we have one host
265        assert_eq!(pool.active_host_count(), 1);
266        assert_eq!(pool.host_stats("example.com").total_requests, 0);
267        assert_eq!(host.key.as_str(), "example.com");
268    }
269
270    #[tokio::test]
271    async fn test_host_reuse() {
272        let pool = HostPool::default();
273        let url: Url = "https://example.com/path1".parse().unwrap();
274        let host_key1 = HostKey::try_from(&url).unwrap();
275
276        let url: Url = "https://example.com/path2".parse().unwrap();
277        let host_key2 = HostKey::try_from(&url).unwrap();
278
279        // Create host for first request
280        let host1 = pool.get_or_create_host(host_key1);
281        assert_eq!(pool.active_host_count(), 1);
282
283        // Second request to same host should reuse
284        let host2 = pool.get_or_create_host(host_key2);
285        assert_eq!(pool.active_host_count(), 1);
286
287        // Should be the same instance
288        assert!(Arc::ptr_eq(&host1, &host2));
289    }
290
291    #[test]
292    fn test_host_config_management() {
293        let pool = HostPool::default();
294
295        // Initially no host configurations
296        let configs = pool.host_configurations();
297        assert_eq!(configs.len(), 0);
298    }
299
300    #[test]
301    fn test_host_removal() {
302        let pool = HostPool::default();
303
304        // Remove non-existent host
305        assert!(!pool.remove_host("nonexistent.com"));
306
307        // We can't easily test removal of existing hosts without making actual requests
308        // due to the async nature of host creation, but the basic functionality works
309    }
310}