Skip to main content

crates_docs/utils/
mod.rs

1//! Utility functions module
2
3use crate::error::{Error, Result};
4use reqwest::Client;
5use reqwest_middleware::ClientBuilder;
6use reqwest_retry::{policies::ExponentialBackoff, RetryTransientMiddleware};
7use std::sync::{Arc, Mutex, OnceLock};
8use std::time::Duration;
9use tokio::sync::Semaphore;
10
11/// Global HTTP client singleton with connection pool reuse
12///
13/// This static instance ensures connection pooling is effective across
14/// all HTTP requests in the application. The client is lazily initialized
15/// on first access.
16static GLOBAL_HTTP_CLIENT: OnceLock<Arc<reqwest_middleware::ClientWithMiddleware>> =
17    OnceLock::new();
18
19/// Storage for initialization error (if any)
20/// Used to avoid retrying failed initialization
21static INIT_ERROR: Mutex<Option<String>> = Mutex::new(None);
22
23/// Initialize the global HTTP client singleton
24///
25/// # Arguments
26///
27/// * `config` - Performance configuration for connection pool settings
28///
29/// # Errors
30///
31/// Returns an error if HTTP client creation fails
32///
33/// # Note
34///
35/// This function is thread-safe and ensures only one thread performs the
36/// expensive client initialization (including TLS setup). Subsequent calls
37/// will return Ok(()) if initialization succeeded, or the original error
38/// if initialization previously failed.
39pub fn init_global_http_client(config: &crate::config::PerformanceConfig) -> Result<()> {
40    // Fast path: already initialized
41    if GLOBAL_HTTP_CLIENT.get().is_some() {
42        return Ok(());
43    }
44
45    // Check if previous initialization failed
46    {
47        let error_guard = INIT_ERROR.lock().map_err(|e| {
48            Error::initialization(
49                "global_http_client",
50                format!("Failed to lock init error mutex: {e}"),
51            )
52        })?;
53        if let Some(ref err_msg) = *error_guard {
54            return Err(Error::initialization(
55                "global_http_client",
56                format!("Previous initialization failed: {err_msg}"),
57            ));
58        }
59    }
60
61    // Slow path: try to initialize
62    let client_result = create_http_client_from_config(config).build();
63
64    match client_result {
65        Ok(client) => {
66            let client_arc = Arc::new(client);
67            // set() returns Err if already initialized, which is fine
68            let _ = GLOBAL_HTTP_CLIENT.set(client_arc);
69            Ok(())
70        }
71        Err(e) => {
72            let err_msg = format!("Failed to create global HTTP client: {e}");
73            if let Ok(mut error_guard) = INIT_ERROR.lock() {
74                *error_guard = Some(err_msg.clone());
75            }
76            Err(Error::initialization("global_http_client", err_msg))
77        }
78    }
79}
80
81/// Get the global HTTP client singleton
82///
83/// # Panics
84///
85/// Panics if the global HTTP client has not been initialized.
86/// Call `init_global_http_client()` before using this function.
87#[must_use]
88pub fn get_global_http_client() -> Arc<reqwest_middleware::ClientWithMiddleware> {
89    GLOBAL_HTTP_CLIENT
90        .get()
91        .cloned()
92        .expect("Global HTTP client not initialized. Call init_global_http_client() first.")
93}
94
95/// Get or initialize the global HTTP client with default config
96///
97/// This is a convenience function for use cases where the client
98/// might not be explicitly initialized. It uses default performance config.
99///
100/// # Errors
101///
102/// Returns an error if HTTP client creation fails (e.g., TLS initialization error).
103/// This function is thread-safe and ensures only one thread performs initialization.
104pub fn get_or_init_global_http_client() -> Result<Arc<reqwest_middleware::ClientWithMiddleware>> {
105    // Fast path: already initialized
106    if let Some(client) = GLOBAL_HTTP_CLIENT.get() {
107        return Ok(client.clone());
108    }
109
110    // Use init_global_http_client with default config for thread-safe initialization
111    let default_config = crate::config::PerformanceConfig::default();
112    init_global_http_client(&default_config)?;
113
114    // Now it should be initialized
115    GLOBAL_HTTP_CLIENT.get().cloned().ok_or_else(|| {
116        Error::initialization(
117            "global_http_client",
118            "HTTP client initialization failed unexpectedly".to_string(),
119        )
120    })
121}
122
123/// HTTP client builder with retry support
124///
125/// This builder creates a `reqwest_middleware::ClientWithMiddleware` that includes
126/// automatic retry functionality for transient failures.
127pub struct HttpClientBuilder {
128    timeout: Duration,
129    connect_timeout: Duration,
130    read_timeout: Duration,
131    pool_max_idle_per_host: usize,
132    pool_idle_timeout: Duration,
133    user_agent: String,
134    enable_gzip: bool,
135    enable_brotli: bool,
136    max_retries: u32,
137    retry_initial_delay: Duration,
138    retry_max_delay: Duration,
139}
140
141impl Default for HttpClientBuilder {
142    fn default() -> Self {
143        Self {
144            timeout: Duration::from_secs(30),
145            connect_timeout: Duration::from_secs(10),
146            read_timeout: Duration::from_secs(30),
147            pool_max_idle_per_host: 10,
148            pool_idle_timeout: Duration::from_secs(90),
149            user_agent: format!("CratesDocsMCP/{}", crate::VERSION),
150            enable_gzip: true,
151            enable_brotli: true,
152            max_retries: 3,
153            retry_initial_delay: Duration::from_millis(100),
154            retry_max_delay: Duration::from_secs(10),
155        }
156    }
157}
158
159impl HttpClientBuilder {
160    /// Create a new HTTP client builder
161    #[must_use]
162    pub fn new() -> Self {
163        Self::default()
164    }
165
166    /// Set request timeout
167    #[must_use]
168    pub fn timeout(mut self, timeout: Duration) -> Self {
169        self.timeout = timeout;
170        self
171    }
172
173    /// Set connection timeout
174    #[must_use]
175    pub fn connect_timeout(mut self, connect_timeout: Duration) -> Self {
176        self.connect_timeout = connect_timeout;
177        self
178    }
179
180    /// Set read timeout
181    #[must_use]
182    pub fn read_timeout(mut self, read_timeout: Duration) -> Self {
183        self.read_timeout = read_timeout;
184        self
185    }
186
187    /// Set connection pool size
188    #[must_use]
189    pub fn pool_max_idle_per_host(mut self, max_idle: usize) -> Self {
190        self.pool_max_idle_per_host = max_idle;
191        self
192    }
193
194    /// Set pool idle timeout
195    #[must_use]
196    pub fn pool_idle_timeout(mut self, idle_timeout: Duration) -> Self {
197        self.pool_idle_timeout = idle_timeout;
198        self
199    }
200
201    /// Set User-Agent
202    #[must_use]
203    pub fn user_agent(mut self, user_agent: String) -> Self {
204        self.user_agent = user_agent;
205        self
206    }
207
208    /// Enable/disable Gzip compression
209    #[must_use]
210    pub fn enable_gzip(mut self, enable: bool) -> Self {
211        self.enable_gzip = enable;
212        self
213    }
214
215    /// Enable/disable Brotli compression
216    #[must_use]
217    pub fn enable_brotli(mut self, enable: bool) -> Self {
218        self.enable_brotli = enable;
219        self
220    }
221
222    /// Set max retry attempts
223    #[must_use]
224    pub fn max_retries(mut self, max_retries: u32) -> Self {
225        self.max_retries = max_retries;
226        self
227    }
228
229    /// Set retry initial delay
230    #[must_use]
231    pub fn retry_initial_delay(mut self, delay: Duration) -> Self {
232        self.retry_initial_delay = delay;
233        self
234    }
235
236    /// Set retry max delay
237    #[must_use]
238    pub fn retry_max_delay(mut self, delay: Duration) -> Self {
239        self.retry_max_delay = delay;
240        self
241    }
242
243    /// Build HTTP client with middleware chain
244    ///
245    /// This method builds a `reqwest_middleware::ClientWithMiddleware` that includes
246    /// automatic retry functionality using exponential backoff for transient failures.
247    ///
248    /// # Returns
249    ///
250    /// Returns a `ClientWithMiddleware` that can be used like a regular `reqwest::Client`
251    /// but with automatic retry on transient errors.
252    pub fn build(self) -> Result<reqwest_middleware::ClientWithMiddleware> {
253        let mut builder = Client::builder()
254            .timeout(self.timeout)
255            .connect_timeout(self.connect_timeout)
256            .pool_max_idle_per_host(self.pool_max_idle_per_host)
257            .pool_idle_timeout(self.pool_idle_timeout)
258            .user_agent(&self.user_agent);
259
260        // reqwest 0.13 enables gzip and brotli by default
261        // To disable, use .no_gzip() and .no_brotli()
262        if !self.enable_gzip {
263            builder = builder.no_gzip();
264        }
265
266        if !self.enable_brotli {
267            builder = builder.no_brotli();
268        }
269
270        let client = builder
271            .build()
272            .map_err(|e| Error::http_request("BUILD", "client", 0, e.to_string()))?;
273
274        // Create retry policy with exponential backoff
275        let retry_policy = ExponentialBackoff::builder()
276            .retry_bounds(self.retry_initial_delay, self.retry_max_delay)
277            .build_with_max_retries(self.max_retries);
278
279        // Build client with retry middleware
280        Ok(ClientBuilder::new(client)
281            .with(RetryTransientMiddleware::new_with_policy(retry_policy))
282            .build())
283    }
284
285    /// Build HTTP client without retry support
286    ///
287    /// This method returns a plain `reqwest::Client` without any middleware.
288    /// Use [`build`](Self::build) for retry support.
289    pub fn build_plain(self) -> Result<Client> {
290        let mut builder = Client::builder()
291            .timeout(self.timeout)
292            .connect_timeout(self.connect_timeout)
293            .pool_max_idle_per_host(self.pool_max_idle_per_host)
294            .pool_idle_timeout(self.pool_idle_timeout)
295            .user_agent(&self.user_agent);
296
297        if !self.enable_gzip {
298            builder = builder.no_gzip();
299        }
300
301        if !self.enable_brotli {
302            builder = builder.no_brotli();
303        }
304
305        builder
306            .build()
307            .map_err(|e| Error::http_request("BUILD", "client", 0, e.to_string()))
308    }
309}
310
311/// Create HTTP client builder from performance config
312///
313/// This function creates an `HttpClientBuilder` pre-configured with settings
314/// from the provided `PerformanceConfig`. The resulting client will include
315/// automatic retry functionality.
316#[must_use]
317pub fn create_http_client_from_config(
318    config: &crate::config::PerformanceConfig,
319) -> HttpClientBuilder {
320    HttpClientBuilder::new()
321        .timeout(Duration::from_secs(config.http_client_timeout_secs))
322        .connect_timeout(Duration::from_secs(config.http_client_connect_timeout_secs))
323        .read_timeout(Duration::from_secs(config.http_client_read_timeout_secs))
324        .pool_max_idle_per_host(config.http_client_pool_size)
325        .pool_idle_timeout(Duration::from_secs(
326            config.http_client_pool_idle_timeout_secs,
327        ))
328        .max_retries(config.http_client_max_retries)
329        .retry_initial_delay(Duration::from_millis(
330            config.http_client_retry_initial_delay_ms,
331        ))
332        .retry_max_delay(Duration::from_millis(config.http_client_retry_max_delay_ms))
333}
334
335/// Rate limiter
336pub struct RateLimiter {
337    semaphore: Arc<Semaphore>,
338    max_permits: usize,
339}
340
341impl RateLimiter {
342    /// Create a new rate limiter
343    #[must_use]
344    pub fn new(max_permits: usize) -> Self {
345        Self {
346            semaphore: Arc::new(Semaphore::new(max_permits)),
347            max_permits,
348        }
349    }
350
351    /// Acquire permit (blocks until available)
352    pub async fn acquire(&self) -> Result<tokio::sync::SemaphorePermit<'_>> {
353        self.semaphore
354            .acquire()
355            .await
356            .map_err(|e| Error::Other(format!("Failed to acquire rate limit permit: {e}")))
357    }
358
359    /// Try to acquire permit (non-blocking)
360    #[must_use]
361    pub fn try_acquire(&self) -> Option<tokio::sync::SemaphorePermit<'_>> {
362        self.semaphore.try_acquire().ok()
363    }
364
365    /// Get current number of available permits
366    #[must_use]
367    pub fn available_permits(&self) -> usize {
368        self.semaphore.available_permits()
369    }
370
371    /// Get maximum number of permits
372    #[must_use]
373    pub fn max_permits(&self) -> usize {
374        self.max_permits
375    }
376}
377
378/// Response compression utilities
379pub mod compression {
380    use crate::error::{Error, Result};
381    use flate2::write::GzEncoder;
382    use flate2::Compression;
383    use std::io::Write;
384
385    /// Compress data (Gzip)
386    pub fn gzip_compress(data: &[u8]) -> Result<Vec<u8>> {
387        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
388        encoder
389            .write_all(data)
390            .map_err(|e| Error::Other(format!("Gzip compression failed: {e}")))?;
391        encoder
392            .finish()
393            .map_err(|e| Error::Other(format!("Gzip compression finalize failed: {e}")))
394    }
395
396    /// Decompress data (Gzip)
397    pub fn gzip_decompress(data: &[u8]) -> Result<Vec<u8>> {
398        let mut decoder = flate2::read::GzDecoder::new(data);
399        let mut decompressed = Vec::new();
400        std::io::Read::read_to_end(&mut decoder, &mut decompressed)
401            .map_err(|e| Error::Other(format!("Gzip decompression failed: {e}")))?;
402        Ok(decompressed)
403    }
404}
405
406/// String utilities
407pub mod string {
408    /// Truncate string and add ellipsis (UTF-8 safe)
409    ///
410    /// # Arguments
411    /// * `s` - The string to truncate
412    /// * `max_len` - Maximum number of characters (not bytes) to keep, including ellipsis
413    ///
414    /// # Examples
415    /// ```
416    /// use crates_docs::utils::string::truncate_with_ellipsis;
417    /// // Basic ASCII truncation
418    /// assert_eq!(truncate_with_ellipsis("hello world", 8), "hello...");
419    /// assert_eq!(truncate_with_ellipsis("short", 10), "short");
420    /// // UTF-8 safe: works with multi-byte characters
421    /// assert_eq!(truncate_with_ellipsis("你好世界", 3), "...");
422    /// assert_eq!(truncate_with_ellipsis("你好世界", 4), "你好世界"); // 4 chars <= max_len, no truncation
423    /// assert_eq!(truncate_with_ellipsis("你好世界", 5), "你好世界"); // 4 chars <= max_len, no truncation
424    /// assert_eq!(truncate_with_ellipsis("你好世界你好", 4), "你...");   // 4 chars > max_len-3, truncate
425    /// ```
426    #[must_use]
427    pub fn truncate_with_ellipsis(s: &str, max_len: usize) -> String {
428        // If max_len is 3 or less, just return ellipsis
429        if max_len <= 3 {
430            return "...".to_string();
431        }
432
433        // Collect characters to properly handle UTF-8
434        let chars: Vec<char> = s.chars().collect();
435
436        // If string is short enough, return it as-is
437        if chars.len() <= max_len {
438            return s.to_string();
439        }
440
441        // Truncate to max_len - 3 characters and add ellipsis
442        let truncated: String = chars.iter().take(max_len - 3).collect();
443        format!("{truncated}...")
444    }
445
446    /// Safely parse number
447    pub fn parse_number<T: std::str::FromStr>(s: &str, default: T) -> T {
448        s.parse().unwrap_or(default)
449    }
450
451    /// Check if string is empty or blank
452    #[must_use]
453    pub fn is_blank(s: &str) -> bool {
454        s.trim().is_empty()
455    }
456}
457
458/// Time utilities
459pub mod time {
460    use chrono::{DateTime, Utc};
461
462    /// Get current timestamp (milliseconds)
463    #[must_use]
464    pub fn current_timestamp_ms() -> i64 {
465        Utc::now().timestamp_millis()
466    }
467
468    /// Format datetime
469    #[must_use]
470    pub fn format_datetime(dt: &DateTime<Utc>) -> String {
471        dt.format("%Y-%m-%d %H:%M:%S%.3f").to_string()
472    }
473
474    /// Calculate elapsed time (milliseconds)
475    #[must_use]
476    pub fn elapsed_ms(start: std::time::Instant) -> u128 {
477        start.elapsed().as_millis()
478    }
479}
480
481/// Validation utilities
482pub mod validation {
483    use crate::error::Error;
484
485    /// Validate crate name
486    pub fn validate_crate_name(name: &str) -> Result<(), Error> {
487        if name.is_empty() {
488            return Err(Error::Other("Crate name cannot be empty".to_string()));
489        }
490
491        if name.len() > 100 {
492            return Err(Error::Other("Crate name is too long".to_string()));
493        }
494
495        // Basic validation: only allow letters, digits, underscores, hyphens
496        if !name
497            .chars()
498            .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-')
499        {
500            return Err(Error::Other(
501                "Crate name contains invalid characters".to_string(),
502            ));
503        }
504
505        Ok(())
506    }
507
508    /// Validate version number
509    pub fn validate_version(version: &str) -> Result<(), Error> {
510        if version.is_empty() {
511            return Err(Error::Other("Version cannot be empty".to_string()));
512        }
513
514        if version.len() > 50 {
515            return Err(Error::Other("Version is too long".to_string()));
516        }
517
518        // Simple validation: should contain digits and dots
519        if !version.chars().any(|c| c.is_ascii_digit()) {
520            return Err(Error::Other("Version must contain digits".to_string()));
521        }
522
523        Ok(())
524    }
525
526    /// Validate search query
527    pub fn validate_search_query(query: &str) -> Result<(), Error> {
528        if query.is_empty() {
529            return Err(Error::Other("Search query cannot be empty".to_string()));
530        }
531
532        if query.len() > 200 {
533            return Err(Error::Other("Search query is too long".to_string()));
534        }
535
536        Ok(())
537    }
538}
539
540/// Performance monitoring
541pub mod metrics {
542    use std::sync::atomic::{AtomicU64, Ordering};
543    use std::sync::Arc;
544    use std::time::Instant;
545
546    /// Performance counter
547    #[derive(Clone)]
548    pub struct PerformanceCounter {
549        total_requests: Arc<AtomicU64>,
550        successful_requests: Arc<AtomicU64>,
551        failed_requests: Arc<AtomicU64>,
552        total_response_time_ms: Arc<AtomicU64>,
553    }
554
555    impl PerformanceCounter {
556        /// Create a new performance counter
557        #[must_use]
558        pub fn new() -> Self {
559            Self {
560                total_requests: Arc::new(AtomicU64::new(0)),
561                successful_requests: Arc::new(AtomicU64::new(0)),
562                failed_requests: Arc::new(AtomicU64::new(0)),
563                total_response_time_ms: Arc::new(AtomicU64::new(0)),
564            }
565        }
566
567        /// Record request start
568        #[must_use]
569        pub fn record_request_start(&self) -> Instant {
570            self.total_requests.fetch_add(1, Ordering::Relaxed);
571            Instant::now()
572        }
573
574        /// Record request completion
575        #[allow(clippy::cast_possible_truncation)]
576        pub fn record_request_complete(&self, start: Instant, success: bool) {
577            let duration_ms = start.elapsed().as_millis() as u64;
578            self.total_response_time_ms
579                .fetch_add(duration_ms, Ordering::Relaxed);
580
581            if success {
582                self.successful_requests.fetch_add(1, Ordering::Relaxed);
583            } else {
584                self.failed_requests.fetch_add(1, Ordering::Relaxed);
585            }
586        }
587
588        /// Get statistics
589        #[must_use]
590        pub fn get_stats(&self) -> PerformanceStats {
591            let total = self.total_requests.load(Ordering::Relaxed);
592            let success = self.successful_requests.load(Ordering::Relaxed);
593            let failed = self.failed_requests.load(Ordering::Relaxed);
594            let total_time = self.total_response_time_ms.load(Ordering::Relaxed);
595
596            #[allow(clippy::cast_precision_loss)]
597            let avg_response_time = if total > 0 {
598                total_time as f64 / total as f64
599            } else {
600                0.0
601            };
602
603            #[allow(clippy::cast_precision_loss)]
604            let success_rate = if total > 0 {
605                success as f64 / total as f64 * 100.0
606            } else {
607                0.0
608            };
609
610            PerformanceStats {
611                total_requests: total,
612                successful_requests: success,
613                failed_requests: failed,
614                average_response_time_ms: avg_response_time,
615                success_rate_percent: success_rate,
616            }
617        }
618
619        /// Reset counter
620        pub fn reset(&self) {
621            self.total_requests.store(0, Ordering::Relaxed);
622            self.successful_requests.store(0, Ordering::Relaxed);
623            self.failed_requests.store(0, Ordering::Relaxed);
624            self.total_response_time_ms.store(0, Ordering::Relaxed);
625        }
626    }
627
628    impl Default for PerformanceCounter {
629        fn default() -> Self {
630            Self::new()
631        }
632    }
633
634    /// Performance statistics
635    #[derive(Debug, Clone, serde::Serialize)]
636    pub struct PerformanceStats {
637        /// Total requests
638        pub total_requests: u64,
639        /// Successful requests
640        pub successful_requests: u64,
641        /// Failed requests
642        pub failed_requests: u64,
643        /// Average response time (milliseconds)
644        pub average_response_time_ms: f64,
645        /// Success rate (percentage)
646        pub success_rate_percent: f64,
647    }
648}