Skip to main content

crates_docs/utils/
mod.rs

1//! Utility functions module
2
3use crate::error::{Error, Result};
4use reqwest::Client;
5use reqwest_middleware::ClientBuilder;
6use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
7use std::sync::{Arc, OnceLock};
8use std::time::Duration;
9use tokio::sync::Semaphore;
10
11/// Global HTTP client singleton with connection pool reuse
12///
13/// This static instance ensures connection pooling is effective across
14/// all HTTP requests in the application. The client is lazily initialized
15/// on first access.
16static GLOBAL_HTTP_CLIENT: OnceLock<Arc<reqwest_middleware::ClientWithMiddleware>> =
17    OnceLock::new();
18
19/// Storage for initialization error (if any)
20/// Used to avoid retrying failed initialization
21static INIT_ERROR: OnceLock<String> = OnceLock::new();
22
23/// Initialize the global HTTP client singleton
24///
25/// # Arguments
26///
27/// * `config` - Performance configuration for connection pool settings
28///
29/// # Errors
30///
31/// Returns an error if HTTP client creation fails
32///
33/// # Note
34///
35/// This function is thread-safe and ensures only one thread performs the
36/// expensive client initialization (including TLS setup). Subsequent calls
37/// will return Ok(()) if initialization succeeded, or the original error
38/// if initialization previously failed.
39pub fn init_global_http_client(config: &crate::config::PerformanceConfig) -> Result<()> {
40    // Fast path: already initialized
41    if GLOBAL_HTTP_CLIENT.get().is_some() {
42        return Ok(());
43    }
44
45    // Check if previous initialization failed
46    if let Some(err_msg) = INIT_ERROR.get() {
47        return Err(Error::initialization(
48            "global_http_client",
49            format!("Previous initialization failed: {err_msg}"),
50        ));
51    }
52
53    // Slow path: try to initialize
54    let client_result = create_http_client_from_config(config).build();
55
56    match client_result {
57        Ok(client) => {
58            let client_arc = Arc::new(client);
59            // set() returns Err if already initialized, which is fine
60            let _ = GLOBAL_HTTP_CLIENT.set(client_arc);
61            Ok(())
62        }
63        Err(e) => {
64            let err_msg = format!("Failed to create global HTTP client: {e}");
65            let _ = INIT_ERROR.set(err_msg.clone());
66            Err(Error::initialization("global_http_client", err_msg))
67        }
68    }
69}
70
71/// Get the global HTTP client singleton
72///
73/// # Panics
74///
75/// Panics if the global HTTP client has not been initialized.
76/// Call `init_global_http_client()` before using this function.
77#[must_use]
78pub fn get_global_http_client() -> Arc<reqwest_middleware::ClientWithMiddleware> {
79    GLOBAL_HTTP_CLIENT
80        .get()
81        .cloned()
82        .expect("Global HTTP client not initialized. Call init_global_http_client() first.")
83}
84
85/// Get or initialize the global HTTP client with default config
86///
87/// This is a convenience function for use cases where the client
88/// might not be explicitly initialized. It uses default performance config.
89///
90/// # Errors
91///
92/// Returns an error if HTTP client creation fails (e.g., TLS initialization error).
93/// This function is thread-safe and ensures only one thread performs initialization.
94pub fn get_or_init_global_http_client() -> Result<Arc<reqwest_middleware::ClientWithMiddleware>> {
95    // Fast path: already initialized
96    if let Some(client) = GLOBAL_HTTP_CLIENT.get() {
97        return Ok(client.clone());
98    }
99
100    // Use init_global_http_client with default config for thread-safe initialization
101    let default_config = crate::config::PerformanceConfig::default();
102    init_global_http_client(&default_config)?;
103
104    // Now it should be initialized
105    GLOBAL_HTTP_CLIENT.get().cloned().ok_or_else(|| {
106        Error::initialization(
107            "global_http_client",
108            "HTTP client initialization failed unexpectedly".to_string(),
109        )
110    })
111}
112
113/// HTTP client builder with retry support
114///
115/// This builder creates a `reqwest_middleware::ClientWithMiddleware` that includes
116/// automatic retry functionality for transient failures.
117pub struct HttpClientBuilder {
118    timeout: Duration,
119    connect_timeout: Duration,
120    read_timeout: Duration,
121    pool_max_idle_per_host: usize,
122    pool_idle_timeout: Duration,
123    user_agent: String,
124    enable_gzip: bool,
125    enable_brotli: bool,
126    max_retries: u32,
127    retry_initial_delay: Duration,
128    retry_max_delay: Duration,
129}
130
131impl Default for HttpClientBuilder {
132    fn default() -> Self {
133        Self {
134            timeout: Duration::from_secs(30),
135            connect_timeout: Duration::from_secs(10),
136            read_timeout: Duration::from_secs(30),
137            pool_max_idle_per_host: 10,
138            pool_idle_timeout: Duration::from_secs(90),
139            user_agent: format!("CratesDocsMCP/{}", crate::VERSION),
140            enable_gzip: true,
141            enable_brotli: true,
142            max_retries: 3,
143            retry_initial_delay: Duration::from_millis(100),
144            retry_max_delay: Duration::from_secs(10),
145        }
146    }
147}
148
149impl HttpClientBuilder {
150    /// Create a new HTTP client builder
151    #[must_use]
152    pub fn new() -> Self {
153        Self::default()
154    }
155
156    /// Set request timeout
157    #[must_use]
158    pub fn timeout(mut self, timeout: Duration) -> Self {
159        self.timeout = timeout;
160        self
161    }
162
163    /// Set connection timeout
164    #[must_use]
165    pub fn connect_timeout(mut self, connect_timeout: Duration) -> Self {
166        self.connect_timeout = connect_timeout;
167        self
168    }
169
170    /// Set read timeout
171    #[must_use]
172    pub fn read_timeout(mut self, read_timeout: Duration) -> Self {
173        self.read_timeout = read_timeout;
174        self
175    }
176
177    /// Set connection pool size
178    #[must_use]
179    pub fn pool_max_idle_per_host(mut self, max_idle: usize) -> Self {
180        self.pool_max_idle_per_host = max_idle;
181        self
182    }
183
184    /// Set pool idle timeout
185    #[must_use]
186    pub fn pool_idle_timeout(mut self, idle_timeout: Duration) -> Self {
187        self.pool_idle_timeout = idle_timeout;
188        self
189    }
190
191    /// Set User-Agent
192    #[must_use]
193    pub fn user_agent(mut self, user_agent: String) -> Self {
194        self.user_agent = user_agent;
195        self
196    }
197
198    /// Enable/disable Gzip compression
199    #[must_use]
200    pub fn enable_gzip(mut self, enable: bool) -> Self {
201        self.enable_gzip = enable;
202        self
203    }
204
205    /// Enable/disable Brotli compression
206    #[must_use]
207    pub fn enable_brotli(mut self, enable: bool) -> Self {
208        self.enable_brotli = enable;
209        self
210    }
211
212    /// Set max retry attempts
213    #[must_use]
214    pub fn max_retries(mut self, max_retries: u32) -> Self {
215        self.max_retries = max_retries;
216        self
217    }
218
219    /// Set retry initial delay
220    #[must_use]
221    pub fn retry_initial_delay(mut self, delay: Duration) -> Self {
222        self.retry_initial_delay = delay;
223        self
224    }
225
226    /// Set retry max delay
227    #[must_use]
228    pub fn retry_max_delay(mut self, delay: Duration) -> Self {
229        self.retry_max_delay = delay;
230        self
231    }
232
233    /// Build HTTP client with middleware chain
234    ///
235    /// This method builds a `reqwest_middleware::ClientWithMiddleware` that includes
236    /// automatic retry functionality using exponential backoff for transient failures.
237    ///
238    /// # Returns
239    ///
240    /// Returns a `ClientWithMiddleware` that can be used like a regular `reqwest::Client`
241    /// but with automatic retry on transient errors.
242    pub fn build(self) -> Result<reqwest_middleware::ClientWithMiddleware> {
243        let mut builder = Client::builder()
244            .timeout(self.timeout)
245            .connect_timeout(self.connect_timeout)
246            .pool_max_idle_per_host(self.pool_max_idle_per_host)
247            .pool_idle_timeout(self.pool_idle_timeout)
248            .user_agent(&self.user_agent);
249
250        // reqwest 0.13 enables gzip and brotli by default
251        // To disable, use .no_gzip() and .no_brotli()
252        if !self.enable_gzip {
253            builder = builder.no_gzip();
254        }
255
256        if !self.enable_brotli {
257            builder = builder.no_brotli();
258        }
259
260        let client = builder
261            .build()
262            .map_err(|e| Error::http_request("BUILD", "client", 0, e.to_string()))?;
263
264        // Create retry policy with exponential backoff
265        let retry_policy = ExponentialBackoff::builder()
266            .retry_bounds(self.retry_initial_delay, self.retry_max_delay)
267            .build_with_max_retries(self.max_retries);
268
269        // Build client with retry middleware
270        Ok(ClientBuilder::new(client)
271            .with(RetryTransientMiddleware::new_with_policy(retry_policy))
272            .build())
273    }
274
275    /// Build HTTP client without retry support
276    ///
277    /// This method returns a plain `reqwest::Client` without any middleware.
278    /// Use [`build`](Self::build) for retry support.
279    pub fn build_plain(self) -> Result<Client> {
280        let mut builder = Client::builder()
281            .timeout(self.timeout)
282            .connect_timeout(self.connect_timeout)
283            .pool_max_idle_per_host(self.pool_max_idle_per_host)
284            .pool_idle_timeout(self.pool_idle_timeout)
285            .user_agent(&self.user_agent);
286
287        if !self.enable_gzip {
288            builder = builder.no_gzip();
289        }
290
291        if !self.enable_brotli {
292            builder = builder.no_brotli();
293        }
294
295        builder
296            .build()
297            .map_err(|e| Error::http_request("BUILD", "client", 0, e.to_string()))
298    }
299}
300
301/// Create HTTP client builder from performance config
302///
303/// This function creates an `HttpClientBuilder` pre-configured with settings
304/// from the provided `PerformanceConfig`. The resulting client will include
305/// automatic retry functionality.
306#[must_use]
307pub fn create_http_client_from_config(
308    config: &crate::config::PerformanceConfig,
309) -> HttpClientBuilder {
310    HttpClientBuilder::new()
311        .timeout(Duration::from_secs(config.http_client_timeout_secs))
312        .connect_timeout(Duration::from_secs(config.http_client_connect_timeout_secs))
313        .read_timeout(Duration::from_secs(config.http_client_read_timeout_secs))
314        .pool_max_idle_per_host(config.http_client_pool_size)
315        .pool_idle_timeout(Duration::from_secs(
316            config.http_client_pool_idle_timeout_secs,
317        ))
318        .max_retries(config.http_client_max_retries)
319        .retry_initial_delay(Duration::from_millis(
320            config.http_client_retry_initial_delay_ms,
321        ))
322        .retry_max_delay(Duration::from_millis(config.http_client_retry_max_delay_ms))
323}
324
325/// Rate limiter
326pub struct RateLimiter {
327    semaphore: Arc<Semaphore>,
328    max_permits: usize,
329}
330
331impl RateLimiter {
332    /// Create a new rate limiter
333    #[must_use]
334    pub fn new(max_permits: usize) -> Self {
335        Self {
336            semaphore: Arc::new(Semaphore::new(max_permits)),
337            max_permits,
338        }
339    }
340
341    /// Acquire permit (blocks until available)
342    pub async fn acquire(&self) -> Result<tokio::sync::SemaphorePermit<'_>> {
343        self.semaphore
344            .acquire()
345            .await
346            .map_err(|e| Error::Other(format!("Failed to acquire rate limit permit: {e}")))
347    }
348
349    /// Try to acquire permit (non-blocking)
350    #[must_use]
351    pub fn try_acquire(&self) -> Option<tokio::sync::SemaphorePermit<'_>> {
352        self.semaphore.try_acquire().ok()
353    }
354
355    /// Get current number of available permits
356    #[must_use]
357    pub fn available_permits(&self) -> usize {
358        self.semaphore.available_permits()
359    }
360
361    /// Get maximum number of permits
362    #[must_use]
363    pub fn max_permits(&self) -> usize {
364        self.max_permits
365    }
366}
367
368/// Response compression utilities
369pub mod compression {
370    use crate::error::{Error, Result};
371    use flate2::write::GzEncoder;
372    use flate2::Compression;
373    use std::io::Write;
374
375    /// Compress data (Gzip)
376    pub fn gzip_compress(data: &[u8]) -> Result<Vec<u8>> {
377        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
378        encoder
379            .write_all(data)
380            .map_err(|e| Error::Other(format!("Gzip compression failed: {e}")))?;
381        encoder
382            .finish()
383            .map_err(|e| Error::Other(format!("Gzip compression finalize failed: {e}")))
384    }
385
386    /// Decompress data (Gzip)
387    pub fn gzip_decompress(data: &[u8]) -> Result<Vec<u8>> {
388        let mut decoder = flate2::read::GzDecoder::new(data);
389        let mut decompressed = Vec::new();
390        std::io::Read::read_to_end(&mut decoder, &mut decompressed)
391            .map_err(|e| Error::Other(format!("Gzip decompression failed: {e}")))?;
392        Ok(decompressed)
393    }
394}
395
396/// String utilities
397pub mod string {
398    /// Truncate string and add ellipsis (UTF-8 safe)
399    ///
400    /// # Arguments
401    /// * `s` - The string to truncate
402    /// * `max_len` - Maximum number of characters (not bytes) to keep, including ellipsis
403    ///
404    /// # Examples
405    /// ```
406    /// use crates_docs::utils::string::truncate_with_ellipsis;
407    /// // Basic ASCII truncation
408    /// assert_eq!(truncate_with_ellipsis("hello world", 8), "hello...");
409    /// assert_eq!(truncate_with_ellipsis("short", 10), "short");
410    /// // UTF-8 safe: works with multi-byte characters
411    /// assert_eq!(truncate_with_ellipsis("你好世界", 3), "...");
412    /// assert_eq!(truncate_with_ellipsis("你好世界", 4), "你好世界"); // 4 chars <= max_len, no truncation
413    /// assert_eq!(truncate_with_ellipsis("你好世界", 5), "你好世界"); // 4 chars <= max_len, no truncation
414    /// assert_eq!(truncate_with_ellipsis("你好世界你好", 4), "你...");   // 4 chars > max_len-3, truncate
415    /// ```
416    #[must_use]
417    pub fn truncate_with_ellipsis(s: &str, max_len: usize) -> String {
418        // If max_len is 3 or less, just return ellipsis
419        if max_len <= 3 {
420            return "...".to_string();
421        }
422
423        // Collect characters to properly handle UTF-8
424        let chars: Vec<char> = s.chars().collect();
425
426        // If string is short enough, return it as-is
427        if chars.len() <= max_len {
428            return s.to_string();
429        }
430
431        // Truncate to max_len - 3 characters and add ellipsis
432        let truncated: String = chars.iter().take(max_len - 3).collect();
433        format!("{truncated}...")
434    }
435
436    /// Safely parse number
437    pub fn parse_number<T: std::str::FromStr>(s: &str, default: T) -> T {
438        s.parse().unwrap_or(default)
439    }
440
441    /// Check if string is empty or blank
442    #[must_use]
443    pub fn is_blank(s: &str) -> bool {
444        s.trim().is_empty()
445    }
446}
447
448/// Time utilities
449pub mod time {
450    use chrono::{DateTime, Utc};
451
452    /// Get current timestamp (milliseconds)
453    #[must_use]
454    pub fn current_timestamp_ms() -> i64 {
455        Utc::now().timestamp_millis()
456    }
457
458    /// Format datetime
459    #[must_use]
460    pub fn format_datetime(dt: &DateTime<Utc>) -> String {
461        dt.format("%Y-%m-%d %H:%M:%S%.3f").to_string()
462    }
463
464    /// Calculate elapsed time (milliseconds)
465    #[must_use]
466    pub fn elapsed_ms(start: std::time::Instant) -> u128 {
467        start.elapsed().as_millis()
468    }
469}
470
471/// Validation utilities
472pub mod validation {
473    use crate::error::Error;
474
475    /// Validate crate name
476    pub fn validate_crate_name(name: &str) -> Result<(), Error> {
477        if name.is_empty() {
478            return Err(Error::Other("Crate name cannot be empty".to_string()));
479        }
480
481        if name.len() > 100 {
482            return Err(Error::Other("Crate name is too long".to_string()));
483        }
484
485        // Basic validation: only allow letters, digits, underscores, hyphens
486        if !name
487            .chars()
488            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
489        {
490            return Err(Error::Other(
491                "Crate name contains invalid characters".to_string(),
492            ));
493        }
494
495        Ok(())
496    }
497
498    /// Validate version number
499    pub fn validate_version(version: &str) -> Result<(), Error> {
500        if version.is_empty() {
501            return Err(Error::Other("Version cannot be empty".to_string()));
502        }
503
504        if version.len() > 50 {
505            return Err(Error::Other("Version is too long".to_string()));
506        }
507
508        // Simple validation: should contain digits and dots
509        if !version.chars().any(|c| c.is_ascii_digit()) {
510            return Err(Error::Other("Version must contain digits".to_string()));
511        }
512
513        Ok(())
514    }
515
516    /// Validate search query
517    pub fn validate_search_query(query: &str) -> Result<(), Error> {
518        if query.is_empty() {
519            return Err(Error::Other("Search query cannot be empty".to_string()));
520        }
521
522        if query.len() > 200 {
523            return Err(Error::Other("Search query is too long".to_string()));
524        }
525
526        Ok(())
527    }
528}
529
530/// Performance monitoring
531pub mod metrics {
532    use std::sync::atomic::{AtomicU64, Ordering};
533    use std::sync::Arc;
534    use std::time::Instant;
535
536    /// Performance counter
537    #[derive(Clone)]
538    pub struct PerformanceCounter {
539        total_requests: Arc<AtomicU64>,
540        successful_requests: Arc<AtomicU64>,
541        failed_requests: Arc<AtomicU64>,
542        total_response_time_ms: Arc<AtomicU64>,
543    }
544
545    impl PerformanceCounter {
546        /// Create a new performance counter
547        #[must_use]
548        pub fn new() -> Self {
549            Self {
550                total_requests: Arc::new(AtomicU64::new(0)),
551                successful_requests: Arc::new(AtomicU64::new(0)),
552                failed_requests: Arc::new(AtomicU64::new(0)),
553                total_response_time_ms: Arc::new(AtomicU64::new(0)),
554            }
555        }
556
557        /// Record request start
558        #[must_use]
559        pub fn record_request_start(&self) -> Instant {
560            self.total_requests.fetch_add(1, Ordering::Relaxed);
561            Instant::now()
562        }
563
564        /// Record request completion
565        #[allow(clippy::cast_possible_truncation)]
566        pub fn record_request_complete(&self, start: Instant, success: bool) {
567            let duration_ms = start.elapsed().as_millis() as u64;
568            self.total_response_time_ms
569                .fetch_add(duration_ms, Ordering::Relaxed);
570
571            if success {
572                self.successful_requests.fetch_add(1, Ordering::Relaxed);
573            } else {
574                self.failed_requests.fetch_add(1, Ordering::Relaxed);
575            }
576        }
577
578        /// Get statistics
579        #[must_use]
580        pub fn get_stats(&self) -> PerformanceStats {
581            let total = self.total_requests.load(Ordering::Relaxed);
582            let success = self.successful_requests.load(Ordering::Relaxed);
583            let failed = self.failed_requests.load(Ordering::Relaxed);
584            let total_time = self.total_response_time_ms.load(Ordering::Relaxed);
585
586            #[allow(clippy::cast_precision_loss)]
587            let avg_response_time = if total > 0 {
588                total_time as f64 / total as f64
589            } else {
590                0.0
591            };
592
593            #[allow(clippy::cast_precision_loss)]
594            let success_rate = if total > 0 {
595                success as f64 / total as f64 * 100.0
596            } else {
597                0.0
598            };
599
600            PerformanceStats {
601                total_requests: total,
602                successful_requests: success,
603                failed_requests: failed,
604                average_response_time_ms: avg_response_time,
605                success_rate_percent: success_rate,
606            }
607        }
608
609        /// Reset counter
610        pub fn reset(&self) {
611            self.total_requests.store(0, Ordering::Relaxed);
612            self.successful_requests.store(0, Ordering::Relaxed);
613            self.failed_requests.store(0, Ordering::Relaxed);
614            self.total_response_time_ms.store(0, Ordering::Relaxed);
615        }
616    }
617
618    impl Default for PerformanceCounter {
619        fn default() -> Self {
620            Self::new()
621        }
622    }
623
624    /// Performance statistics
625    #[derive(Debug, Clone, serde::Serialize)]
626    pub struct PerformanceStats {
627        /// Total requests
628        pub total_requests: u64,
629        /// Successful requests
630        pub successful_requests: u64,
631        /// Failed requests
632        pub failed_requests: u64,
633        /// Average response time (milliseconds)
634        pub average_response_time_ms: f64,
635        /// Success rate (percentage)
636        pub success_rate_percent: f64,
637    }
638}