Skip to main content

terraphim_service/
http_client.rs

1/// Centralized HTTP client creation and configuration for Terraphim services
2///
3/// This module provides shared HTTP client instances with connection pooling
4/// to avoid creating new connections for each request. This significantly
5/// improves performance for repeated API calls by reusing TCP connections.
6use std::time::Duration;
7
8use once_cell::sync::Lazy;
9use reqwest::Client;
10
11/// Default timeout for HTTP requests (30 seconds)
12pub const DEFAULT_TIMEOUT_SECS: u64 = 30;
13
14/// Default user agent for Terraphim HTTP clients
15pub const DEFAULT_USER_AGENT: &str = concat!(
16    "Terraphim/",
17    env!("CARGO_PKG_VERSION"),
18    " (https://github.com/terraphim/terraphim-ai)"
19);
20
21/// Default connection pool settings
22///
23/// These values are tuned for typical API usage patterns:
24/// - 10 idle connections per host allows for concurrent requests
25/// - 90 second idle timeout balances connection reuse vs resource cleanup
26const POOL_MAX_IDLE_PER_HOST: usize = 10;
27const POOL_IDLE_TIMEOUT_SECS: u64 = 90;
28
29/// Global default HTTP client with connection pooling
30///
31/// This client is lazily initialized on first use and reused for all
32/// default HTTP operations. It includes:
33/// - 30-second timeout for requests
34/// - Terraphim user agent header
35/// - Connection pooling (10 idle per host, 90s timeout)
36static DEFAULT_CLIENT: Lazy<Client> = Lazy::new(|| {
37    Client::builder()
38        .timeout(Duration::from_secs(DEFAULT_TIMEOUT_SECS))
39        .user_agent(DEFAULT_USER_AGENT)
40        .pool_max_idle_per_host(POOL_MAX_IDLE_PER_HOST)
41        .pool_idle_timeout(Duration::from_secs(POOL_IDLE_TIMEOUT_SECS))
42        .build()
43        .expect("Failed to build default HTTP client")
44});
45
46/// Global API HTTP client with connection pooling and JSON headers
47///
48/// Optimized for REST API calls with:
49/// - 10-second timeout for responsive APIs
50/// - JSON content type and accept headers
51/// - Connection pooling for repeated API calls
52static API_CLIENT: Lazy<Client> = Lazy::new(|| {
53    use reqwest::header::{ACCEPT, CONTENT_TYPE, HeaderMap, HeaderValue};
54
55    let mut headers = HeaderMap::new();
56    headers.insert(CONTENT_TYPE, HeaderValue::from_static("application/json"));
57    headers.insert(ACCEPT, HeaderValue::from_static("application/json"));
58
59    Client::builder()
60        .timeout(Duration::from_secs(10))
61        .user_agent(DEFAULT_USER_AGENT)
62        .default_headers(headers)
63        .pool_max_idle_per_host(POOL_MAX_IDLE_PER_HOST)
64        .pool_idle_timeout(Duration::from_secs(POOL_IDLE_TIMEOUT_SECS))
65        .build()
66        .expect("Failed to build API HTTP client")
67});
68
69/// Global web scraping HTTP client with connection pooling
70///
71/// Optimized for web scraping with:
72/// - 60-second timeout for slow websites
73/// - Browser-like user agent
74/// - HTML content acceptance
75static SCRAPING_CLIENT: Lazy<Client> = Lazy::new(|| {
76    use reqwest::header::{ACCEPT, HeaderMap, HeaderValue};
77
78    let mut headers = HeaderMap::new();
79    headers.insert(
80        ACCEPT,
81        HeaderValue::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"),
82    );
83
84    Client::builder()
85        .timeout(Duration::from_secs(60))
86        .user_agent("Mozilla/5.0 (compatible; Terraphim/1.0; +https://terraphim.ai)")
87        .default_headers(headers)
88        .pool_max_idle_per_host(POOL_MAX_IDLE_PER_HOST)
89        .pool_idle_timeout(Duration::from_secs(POOL_IDLE_TIMEOUT_SECS))
90        .build()
91        .expect("Failed to build scraping HTTP client")
92});
93
94/// Get the global default HTTP client with connection pooling
95///
96/// This client includes:
97/// - 30-second timeout for requests
98/// - Terraphim user agent header
99/// - Connection pooling and keep-alive
100///
101/// Use this for most HTTP operations where no special configuration is needed.
102pub fn get_default_client() -> &'static Client {
103    &DEFAULT_CLIENT
104}
105
106/// Get an HTTP client with custom timeout
107///
108/// Note: This creates a new client instance. For better performance,
109/// prefer `get_default_client()` when possible.
110pub fn create_client_with_timeout(timeout_secs: u64) -> reqwest::Result<Client> {
111    Client::builder()
112        .timeout(Duration::from_secs(timeout_secs))
113        .user_agent(DEFAULT_USER_AGENT)
114        .pool_max_idle_per_host(POOL_MAX_IDLE_PER_HOST)
115        .pool_idle_timeout(Duration::from_secs(POOL_IDLE_TIMEOUT_SECS))
116        .build()
117}
118
119/// Get the global API HTTP client with JSON headers and connection pooling
120///
121/// This client is configured for typical REST API usage:
122/// - Shorter timeout (10 seconds) for responsive APIs
123/// - JSON content type header
124/// - Accept JSON responses
125/// - Connection pooling for repeated API calls
126///
127/// Use this for LLM API calls and other JSON-based APIs.
128pub fn get_api_client() -> &'static Client {
129    &API_CLIENT
130}
131
132/// Create a custom HTTP client with specific configuration
133///
134/// Note: This creates a new client instance. For better performance,
135/// prefer the global clients when possible.
136///
137/// Use this for specialized use cases like:
138/// - Custom headers (API keys, authentication)
139/// - Proxy configuration
140/// - Custom SSL/TLS settings
141pub fn create_custom_client(
142    timeout: Option<Duration>,
143    default_headers: Option<reqwest::header::HeaderMap>,
144    proxy: Option<reqwest::Proxy>,
145) -> reqwest::Result<Client> {
146    let mut builder = Client::builder()
147        .user_agent(DEFAULT_USER_AGENT)
148        .pool_max_idle_per_host(POOL_MAX_IDLE_PER_HOST)
149        .pool_idle_timeout(Duration::from_secs(POOL_IDLE_TIMEOUT_SECS));
150
151    if let Some(timeout) = timeout {
152        builder = builder.timeout(timeout);
153    } else {
154        builder = builder.timeout(Duration::from_secs(DEFAULT_TIMEOUT_SECS));
155    }
156
157    if let Some(headers) = default_headers {
158        builder = builder.default_headers(headers);
159    }
160
161    if let Some(proxy) = proxy {
162        builder = builder.proxy(proxy);
163    }
164
165    builder.build()
166}
167
168/// Get the global web scraping HTTP client with connection pooling
169///
170/// This client is configured for scraping web pages:
171/// - Longer timeout (60 seconds) for slow websites
172/// - Browser-like headers to avoid blocking
173/// - HTML content acceptance
174/// - Connection pooling for repeated requests
175///
176/// Use this for web scraping operations.
177pub fn get_scraping_client() -> &'static Client {
178    &SCRAPING_CLIENT
179}
180
181// Backwards compatibility aliases - these return Result for compatibility
182// with existing code that uses `?` or `unwrap()`
183
184/// Backwards compatibility: returns a clone of the default client
185///
186/// This function returns `Ok(Client)` for full backwards compatibility.
187/// The client is cheap to clone (internally Arc-based).
188pub fn create_default_client() -> reqwest::Result<Client> {
189    Ok(get_default_client().clone())
190}
191
192/// Backwards compatibility: returns a clone of the API client
193///
194/// This function returns `Ok(Client)` for full backwards compatibility.
195/// The client is cheap to clone (internally Arc-based).
196pub fn create_api_client() -> reqwest::Result<Client> {
197    Ok(get_api_client().clone())
198}
199
200/// Backwards compatibility: returns a clone of the scraping client
201///
202/// This function returns `Ok(Client)` for full backwards compatibility.
203/// The client is cheap to clone (internally Arc-based).
204pub fn create_scraping_client() -> reqwest::Result<Client> {
205    Ok(get_scraping_client().clone())
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211
212    #[test]
213    fn test_get_default_client() {
214        let client = get_default_client();
215        // Verify we get the same instance (singleton)
216        let client2 = get_default_client();
217        assert!(
218            std::ptr::eq(client, client2),
219            "Should return same client instance"
220        );
221    }
222
223    #[test]
224    fn test_get_api_client() {
225        let client = get_api_client();
226        let client2 = get_api_client();
227        assert!(
228            std::ptr::eq(client, client2),
229            "Should return same API client instance"
230        );
231    }
232
233    #[test]
234    fn test_get_scraping_client() {
235        let client = get_scraping_client();
236        let client2 = get_scraping_client();
237        assert!(
238            std::ptr::eq(client, client2),
239            "Should return same scraping client instance"
240        );
241    }
242
243    #[test]
244    fn test_create_client_with_timeout() {
245        let client = create_client_with_timeout(5);
246        assert!(
247            client.is_ok(),
248            "Client with custom timeout should be created"
249        );
250    }
251
252    #[test]
253    fn test_create_custom_client_minimal() {
254        let client = create_custom_client(None, None, None);
255        assert!(client.is_ok(), "Custom client with no options should work");
256    }
257
258    #[test]
259    fn test_user_agent_contains_version() {
260        assert!(DEFAULT_USER_AGENT.contains("Terraphim/"));
261        assert!(DEFAULT_USER_AGENT.contains("https://github.com/terraphim/terraphim-ai"));
262    }
263
264    #[test]
265    fn test_backwards_compatibility() {
266        // Ensure old API still works (returns Result with owned Client)
267        let _client = create_default_client().unwrap();
268        let _api_client = create_api_client().unwrap();
269        let _scraping_client = create_scraping_client().unwrap();
270
271        // All should be valid clients (clone of global instances)
272        // Note: Client uses Arc internally, so clones share the same connection pool
273        // Just verify they were created successfully
274    }
275}