Skip to main content

crates_docs/utils/
mod.rs

1//! Utility functions module
2
3use crate::error::{Error, Result};
4use reqwest::Client;
5use reqwest_middleware::ClientBuilder;
6use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
7use std::sync::{Arc, OnceLock};
8use std::time::Duration;
9use tokio::sync::Semaphore;
10
11/// Global HTTP client singleton with connection pool reuse
12///
13/// This static instance ensures connection pooling is effective across
14/// all HTTP requests in the application. The client is lazily initialized
15/// on first access.
16static GLOBAL_HTTP_CLIENT: OnceLock<Arc<reqwest_middleware::ClientWithMiddleware>> =
17    OnceLock::new();
18
19/// Storage for initialization error (if any)
20/// Used to avoid retrying failed initialization
21static INIT_ERROR: OnceLock<String> = OnceLock::new();
22
23/// Initialize the global HTTP client singleton
24///
25/// # Arguments
26///
27/// * `config` - Performance configuration for connection pool settings
28///
29/// # Errors
30///
31/// Returns an error if HTTP client creation fails
32///
33/// # Note
34///
35/// This function is thread-safe and ensures only one thread performs the
36/// expensive client initialization (including TLS setup). Subsequent calls
37/// will return Ok(()) if initialization succeeded, or the original error
38/// if initialization previously failed.
39pub fn init_global_http_client(config: &crate::config::PerformanceConfig) -> Result<()> {
40    // Fast path: already initialized
41    if GLOBAL_HTTP_CLIENT.get().is_some() {
42        return Ok(());
43    }
44
45    // Check if previous initialization failed
46    if let Some(err_msg) = INIT_ERROR.get() {
47        return Err(Error::initialization(
48            "global_http_client",
49            format!("Previous initialization failed: {err_msg}"),
50        ));
51    }
52
53    // Slow path: try to initialize
54    let client_result = create_http_client_from_config(config).build();
55
56    match client_result {
57        Ok(client) => {
58            let client_arc = Arc::new(client);
59            // set() returns Err if already initialized, which is fine
60            let _ = GLOBAL_HTTP_CLIENT.set(client_arc);
61            Ok(())
62        }
63        Err(e) => {
64            let err_msg = format!("Failed to create global HTTP client: {e}");
65            let _ = INIT_ERROR.set(err_msg.clone());
66            Err(Error::initialization("global_http_client", err_msg))
67        }
68    }
69}
70
71/// Get the global HTTP client singleton
72///
73/// # Errors
74///
75/// Returns an error if the global HTTP client has not been initialized.
76/// Call `init_global_http_client()` before using this function.
77///
78/// If you need automatic initialization, use `get_or_init_global_http_client()` instead.
79#[must_use = "returns a Result that should be checked"]
80pub fn get_global_http_client() -> Result<Arc<reqwest_middleware::ClientWithMiddleware>> {
81    GLOBAL_HTTP_CLIENT.get().cloned().ok_or_else(|| {
82        Error::initialization(
83            "global_http_client",
84            "Global HTTP client not initialized. Call init_global_http_client() first.",
85        )
86    })
87}
88
89/// Get or initialize the global HTTP client with default config
90///
91/// This is a convenience function for use cases where the client
92/// might not be explicitly initialized. It uses default performance config.
93///
94/// # Errors
95///
96/// Returns an error if HTTP client creation fails (e.g., TLS initialization error).
97/// This function is thread-safe and ensures only one thread performs initialization.
98pub fn get_or_init_global_http_client() -> Result<Arc<reqwest_middleware::ClientWithMiddleware>> {
99    // Fast path: already initialized
100    if let Some(client) = GLOBAL_HTTP_CLIENT.get() {
101        return Ok(client.clone());
102    }
103
104    // Use init_global_http_client with default config for thread-safe initialization
105    let default_config = crate::config::PerformanceConfig::default();
106    init_global_http_client(&default_config)?;
107
108    // Now it should be initialized
109    GLOBAL_HTTP_CLIENT.get().cloned().ok_or_else(|| {
110        Error::initialization(
111            "global_http_client",
112            "HTTP client initialization failed unexpectedly".to_string(),
113        )
114    })
115}
116
117/// HTTP client builder with retry support
118///
119/// This builder creates a `reqwest_middleware::ClientWithMiddleware` that includes
120/// automatic retry functionality for transient failures.
121pub struct HttpClientBuilder {
122    timeout: Duration,
123    connect_timeout: Duration,
124    read_timeout: Duration,
125    pool_max_idle_per_host: usize,
126    pool_idle_timeout: Duration,
127    user_agent: String,
128    enable_gzip: bool,
129    enable_brotli: bool,
130    max_retries: u32,
131    retry_initial_delay: Duration,
132    retry_max_delay: Duration,
133}
134
135impl Default for HttpClientBuilder {
136    fn default() -> Self {
137        Self {
138            timeout: Duration::from_secs(30),
139            connect_timeout: Duration::from_secs(10),
140            read_timeout: Duration::from_secs(30),
141            pool_max_idle_per_host: 10,
142            pool_idle_timeout: Duration::from_secs(90),
143            user_agent: format!("CratesDocsMCP/{}", crate::VERSION),
144            enable_gzip: true,
145            enable_brotli: true,
146            max_retries: 3,
147            retry_initial_delay: Duration::from_millis(100),
148            retry_max_delay: Duration::from_secs(10),
149        }
150    }
151}
152
153impl HttpClientBuilder {
154    /// Create a new HTTP client builder
155    #[must_use]
156    pub fn new() -> Self {
157        Self::default()
158    }
159
160    /// Set request timeout
161    #[must_use]
162    pub fn timeout(mut self, timeout: Duration) -> Self {
163        self.timeout = timeout;
164        self
165    }
166
167    /// Set connection timeout
168    #[must_use]
169    pub fn connect_timeout(mut self, connect_timeout: Duration) -> Self {
170        self.connect_timeout = connect_timeout;
171        self
172    }
173
174    /// Set read timeout
175    #[must_use]
176    pub fn read_timeout(mut self, read_timeout: Duration) -> Self {
177        self.read_timeout = read_timeout;
178        self
179    }
180
181    /// Set connection pool size
182    #[must_use]
183    pub fn pool_max_idle_per_host(mut self, max_idle: usize) -> Self {
184        self.pool_max_idle_per_host = max_idle;
185        self
186    }
187
188    /// Set pool idle timeout
189    #[must_use]
190    pub fn pool_idle_timeout(mut self, idle_timeout: Duration) -> Self {
191        self.pool_idle_timeout = idle_timeout;
192        self
193    }
194
195    /// Set User-Agent
196    #[must_use]
197    pub fn user_agent(mut self, user_agent: String) -> Self {
198        self.user_agent = user_agent;
199        self
200    }
201
202    /// Enable/disable Gzip compression
203    #[must_use]
204    pub fn enable_gzip(mut self, enable: bool) -> Self {
205        self.enable_gzip = enable;
206        self
207    }
208
209    /// Enable/disable Brotli compression
210    #[must_use]
211    pub fn enable_brotli(mut self, enable: bool) -> Self {
212        self.enable_brotli = enable;
213        self
214    }
215
216    /// Set max retry attempts
217    #[must_use]
218    pub fn max_retries(mut self, max_retries: u32) -> Self {
219        self.max_retries = max_retries;
220        self
221    }
222
223    /// Set retry initial delay
224    #[must_use]
225    pub fn retry_initial_delay(mut self, delay: Duration) -> Self {
226        self.retry_initial_delay = delay;
227        self
228    }
229
230    /// Set retry max delay
231    #[must_use]
232    pub fn retry_max_delay(mut self, delay: Duration) -> Self {
233        self.retry_max_delay = delay;
234        self
235    }
236
237    /// Build HTTP client with middleware chain
238    ///
239    /// This method builds a `reqwest_middleware::ClientWithMiddleware` that includes
240    /// automatic retry functionality using exponential backoff for transient failures.
241    ///
242    /// # Returns
243    ///
244    /// Returns a `ClientWithMiddleware` that can be used like a regular `reqwest::Client`
245    /// but with automatic retry on transient errors.
246    pub fn build(self) -> Result<reqwest_middleware::ClientWithMiddleware> {
247        let mut builder = Client::builder()
248            .timeout(self.timeout)
249            .connect_timeout(self.connect_timeout)
250            .pool_max_idle_per_host(self.pool_max_idle_per_host)
251            .pool_idle_timeout(self.pool_idle_timeout)
252            .user_agent(&self.user_agent);
253
254        // reqwest 0.13 enables gzip and brotli by default
255        // To disable, use .no_gzip() and .no_brotli()
256        if !self.enable_gzip {
257            builder = builder.no_gzip();
258        }
259
260        if !self.enable_brotli {
261            builder = builder.no_brotli();
262        }
263
264        let client = builder
265            .build()
266            .map_err(|e| Error::http_request("BUILD", "client", 0, e.to_string()))?;
267
268        // Create retry policy with exponential backoff
269        let retry_policy = ExponentialBackoff::builder()
270            .retry_bounds(self.retry_initial_delay, self.retry_max_delay)
271            .build_with_max_retries(self.max_retries);
272
273        // Build client with retry middleware
274        Ok(ClientBuilder::new(client)
275            .with(RetryTransientMiddleware::new_with_policy(retry_policy))
276            .build())
277    }
278
279    /// Build HTTP client without retry support
280    ///
281    /// This method returns a plain `reqwest::Client` without any middleware.
282    /// Use [`build`](Self::build) for retry support.
283    pub fn build_plain(self) -> Result<Client> {
284        let mut builder = Client::builder()
285            .timeout(self.timeout)
286            .connect_timeout(self.connect_timeout)
287            .pool_max_idle_per_host(self.pool_max_idle_per_host)
288            .pool_idle_timeout(self.pool_idle_timeout)
289            .user_agent(&self.user_agent);
290
291        if !self.enable_gzip {
292            builder = builder.no_gzip();
293        }
294
295        if !self.enable_brotli {
296            builder = builder.no_brotli();
297        }
298
299        builder
300            .build()
301            .map_err(|e| Error::http_request("BUILD", "client", 0, e.to_string()))
302    }
303}
304
305/// Create HTTP client builder from performance config
306///
307/// This function creates an `HttpClientBuilder` pre-configured with settings
308/// from the provided `PerformanceConfig`. The resulting client will include
309/// automatic retry functionality.
310#[must_use]
311pub fn create_http_client_from_config(
312    config: &crate::config::PerformanceConfig,
313) -> HttpClientBuilder {
314    HttpClientBuilder::new()
315        .timeout(Duration::from_secs(config.http_client_timeout_secs))
316        .connect_timeout(Duration::from_secs(config.http_client_connect_timeout_secs))
317        .read_timeout(Duration::from_secs(config.http_client_read_timeout_secs))
318        .pool_max_idle_per_host(config.http_client_pool_size)
319        .pool_idle_timeout(Duration::from_secs(
320            config.http_client_pool_idle_timeout_secs,
321        ))
322        .max_retries(config.http_client_max_retries)
323        .retry_initial_delay(Duration::from_millis(
324            config.http_client_retry_initial_delay_ms,
325        ))
326        .retry_max_delay(Duration::from_millis(config.http_client_retry_max_delay_ms))
327}
328
329/// Rate limiter
330pub struct RateLimiter {
331    semaphore: Arc<Semaphore>,
332    max_permits: usize,
333}
334
335impl RateLimiter {
336    /// Create a new rate limiter
337    #[must_use]
338    pub fn new(max_permits: usize) -> Self {
339        Self {
340            semaphore: Arc::new(Semaphore::new(max_permits)),
341            max_permits,
342        }
343    }
344
345    /// Acquire permit (blocks until available)
346    pub async fn acquire(&self) -> Result<tokio::sync::SemaphorePermit<'_>> {
347        self.semaphore
348            .acquire()
349            .await
350            .map_err(|e| Error::Other(format!("Failed to acquire rate limit permit: {e}")))
351    }
352
353    /// Try to acquire permit (non-blocking)
354    #[must_use]
355    pub fn try_acquire(&self) -> Option<tokio::sync::SemaphorePermit<'_>> {
356        self.semaphore.try_acquire().ok()
357    }
358
359    /// Get current number of available permits
360    #[must_use]
361    pub fn available_permits(&self) -> usize {
362        self.semaphore.available_permits()
363    }
364
365    /// Get maximum number of permits
366    #[must_use]
367    pub fn max_permits(&self) -> usize {
368        self.max_permits
369    }
370}
371
372/// Response compression utilities
373pub mod compression {
374    use crate::error::{Error, Result};
375    use flate2::write::GzEncoder;
376    use flate2::Compression;
377    use std::io::Write;
378
379    /// Compress data (Gzip)
380    pub fn gzip_compress(data: &[u8]) -> Result<Vec<u8>> {
381        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
382        encoder
383            .write_all(data)
384            .map_err(|e| Error::Other(format!("Gzip compression failed: {e}")))?;
385        encoder
386            .finish()
387            .map_err(|e| Error::Other(format!("Gzip compression finalize failed: {e}")))
388    }
389
390    /// Decompress data (Gzip)
391    pub fn gzip_decompress(data: &[u8]) -> Result<Vec<u8>> {
392        let mut decoder = flate2::read::GzDecoder::new(data);
393        let mut decompressed = Vec::new();
394        std::io::Read::read_to_end(&mut decoder, &mut decompressed)
395            .map_err(|e| Error::Other(format!("Gzip decompression failed: {e}")))?;
396        Ok(decompressed)
397    }
398}
399
400/// String utilities
401pub mod string {
402    /// Truncate string and add ellipsis (UTF-8 safe)
403    ///
404    /// # Arguments
405    /// * `s` - The string to truncate
406    /// * `max_len` - Maximum number of characters (not bytes) to keep, including ellipsis
407    ///
408    /// # Examples
409    /// ```
410    /// use crates_docs::utils::string::truncate_with_ellipsis;
411    /// // Basic ASCII truncation
412    /// assert_eq!(truncate_with_ellipsis("hello world", 8), "hello...");
413    /// assert_eq!(truncate_with_ellipsis("short", 10), "short");
414    /// // UTF-8 safe: works with multi-byte characters
415    /// assert_eq!(truncate_with_ellipsis("你好世界", 3), "...");
416    /// assert_eq!(truncate_with_ellipsis("你好世界", 4), "你好世界"); // 4 chars <= max_len, no truncation
417    /// assert_eq!(truncate_with_ellipsis("你好世界", 5), "你好世界"); // 4 chars <= max_len, no truncation
418    /// assert_eq!(truncate_with_ellipsis("你好世界你好", 4), "你...");   // 4 chars > max_len-3, truncate
419    /// ```
420    #[must_use]
421    pub fn truncate_with_ellipsis(s: &str, max_len: usize) -> String {
422        // If max_len is 3 or less, just return ellipsis
423        if max_len <= 3 {
424            return "...".to_string();
425        }
426
427        // Collect characters to properly handle UTF-8
428        let chars: Vec<char> = s.chars().collect();
429
430        // If string is short enough, return it as-is
431        if chars.len() <= max_len {
432            return s.to_string();
433        }
434
435        // Truncate to max_len - 3 characters and add ellipsis
436        let truncated: String = chars.iter().take(max_len - 3).collect();
437        format!("{truncated}...")
438    }
439
440    /// Safely parse number
441    pub fn parse_number<T: std::str::FromStr>(s: &str, default: T) -> T {
442        s.parse().unwrap_or(default)
443    }
444
445    /// Check if string is empty or blank
446    #[must_use]
447    pub fn is_blank(s: &str) -> bool {
448        s.trim().is_empty()
449    }
450}
451
452/// Time utilities
453pub mod time {
454    use chrono::{DateTime, Utc};
455
456    /// Get current timestamp (milliseconds)
457    #[must_use]
458    pub fn current_timestamp_ms() -> i64 {
459        Utc::now().timestamp_millis()
460    }
461
462    /// Format datetime
463    #[must_use]
464    pub fn format_datetime(dt: &DateTime<Utc>) -> String {
465        dt.format("%Y-%m-%d %H:%M:%S%.3f").to_string()
466    }
467
468    /// Calculate elapsed time (milliseconds)
469    #[must_use]
470    pub fn elapsed_ms(start: std::time::Instant) -> u128 {
471        start.elapsed().as_millis()
472    }
473}
474
475/// Validation utilities
476pub mod validation {
477    use crate::error::Error;
478
479    /// Maximum allowed length for crate names (100 characters)
480    const MAX_CRATE_NAME_LENGTH: usize = 100;
481    /// Maximum allowed length for version strings (50 characters)
482    const MAX_VERSION_LENGTH: usize = 50;
483    /// Maximum allowed length for search queries (200 characters)
484    const MAX_SEARCH_QUERY_LENGTH: usize = 200;
485
486    /// Validate crate name
487    pub fn validate_crate_name(name: &str) -> Result<(), Error> {
488        if name.is_empty() {
489            return Err(Error::Other("Crate name cannot be empty".to_string()));
490        }
491
492        if name.len() > MAX_CRATE_NAME_LENGTH {
493            return Err(Error::Other("Crate name is too long".to_string()));
494        }
495
496        // Basic validation: only allow letters, digits, underscores, hyphens
497        if !name
498            .chars()
499            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
500        {
501            return Err(Error::Other(
502                "Crate name contains invalid characters".to_string(),
503            ));
504        }
505
506        Ok(())
507    }
508
509    /// Validate version number
510    pub fn validate_version(version: &str) -> Result<(), Error> {
511        if version.is_empty() {
512            return Err(Error::Other("Version cannot be empty".to_string()));
513        }
514
515        if version.len() > MAX_VERSION_LENGTH {
516            return Err(Error::Other("Version is too long".to_string()));
517        }
518
519        // Simple validation: should contain digits and dots
520        if !version.chars().any(|c| c.is_ascii_digit()) {
521            return Err(Error::Other("Version must contain digits".to_string()));
522        }
523
524        Ok(())
525    }
526
527    /// Validate search query
528    pub fn validate_search_query(query: &str) -> Result<(), Error> {
529        if query.is_empty() {
530            return Err(Error::Other("Search query cannot be empty".to_string()));
531        }
532
533        if query.len() > MAX_SEARCH_QUERY_LENGTH {
534            return Err(Error::Other("Search query is too long".to_string()));
535        }
536
537        Ok(())
538    }
539}
540
541/// Performance monitoring
542pub mod metrics {
543    use std::sync::atomic::{AtomicU64, Ordering};
544    use std::sync::Arc;
545    use std::time::Instant;
546
547    /// Performance counter
548    #[derive(Clone)]
549    pub struct PerformanceCounter {
550        total_requests: Arc<AtomicU64>,
551        successful_requests: Arc<AtomicU64>,
552        failed_requests: Arc<AtomicU64>,
553        total_response_time_ms: Arc<AtomicU64>,
554    }
555
556    impl PerformanceCounter {
557        /// Create a new performance counter
558        #[must_use]
559        pub fn new() -> Self {
560            Self {
561                total_requests: Arc::new(AtomicU64::new(0)),
562                successful_requests: Arc::new(AtomicU64::new(0)),
563                failed_requests: Arc::new(AtomicU64::new(0)),
564                total_response_time_ms: Arc::new(AtomicU64::new(0)),
565            }
566        }
567
568        /// Record request start
569        #[must_use]
570        pub fn record_request_start(&self) -> Instant {
571            self.total_requests.fetch_add(1, Ordering::Relaxed);
572            Instant::now()
573        }
574
575        /// Record request completion
576        #[allow(clippy::cast_possible_truncation)]
577        pub fn record_request_complete(&self, start: Instant, success: bool) {
578            let duration_ms = start.elapsed().as_millis() as u64;
579            self.total_response_time_ms
580                .fetch_add(duration_ms, Ordering::Relaxed);
581
582            if success {
583                self.successful_requests.fetch_add(1, Ordering::Relaxed);
584            } else {
585                self.failed_requests.fetch_add(1, Ordering::Relaxed);
586            }
587        }
588
589        /// Get statistics
590        #[must_use]
591        pub fn get_stats(&self) -> PerformanceStats {
592            let total = self.total_requests.load(Ordering::Relaxed);
593            let success = self.successful_requests.load(Ordering::Relaxed);
594            let failed = self.failed_requests.load(Ordering::Relaxed);
595            let total_time = self.total_response_time_ms.load(Ordering::Relaxed);
596
597            #[allow(clippy::cast_precision_loss)]
598            let avg_response_time = if total > 0 {
599                total_time as f64 / total as f64
600            } else {
601                0.0
602            };
603
604            #[allow(clippy::cast_precision_loss)]
605            let success_rate = if total > 0 {
606                success as f64 / total as f64 * 100.0
607            } else {
608                0.0
609            };
610
611            PerformanceStats {
612                total_requests: total,
613                successful_requests: success,
614                failed_requests: failed,
615                average_response_time_ms: avg_response_time,
616                success_rate_percent: success_rate,
617            }
618        }
619
620        /// Reset counter
621        pub fn reset(&self) {
622            self.total_requests.store(0, Ordering::Relaxed);
623            self.successful_requests.store(0, Ordering::Relaxed);
624            self.failed_requests.store(0, Ordering::Relaxed);
625            self.total_response_time_ms.store(0, Ordering::Relaxed);
626        }
627    }
628
629    impl Default for PerformanceCounter {
630        fn default() -> Self {
631            Self::new()
632        }
633    }
634
635    /// Performance statistics
636    #[derive(Debug, Clone, serde::Serialize)]
637    pub struct PerformanceStats {
638        /// Total requests
639        pub total_requests: u64,
640        /// Successful requests
641        pub successful_requests: u64,
642        /// Failed requests
643        pub failed_requests: u64,
644        /// Average response time (milliseconds)
645        pub average_response_time_ms: f64,
646        /// Success rate (percentage)
647        pub success_rate_percent: f64,
648    }
649}