api_openai/
diagnostics.rs

1//! General diagnostics functionality for monitoring API requests, performance, and errors.
2//!
3//! This module provides comprehensive diagnostics capabilities including:
4//! - Request/response lifecycle tracking
5//! - Performance metrics collection and aggregation
6//! - Error analysis and reporting
7//! - Integration with existing cURL diagnostics
8//! - Low-overhead metrics collection
9
10/// Define a private namespace for all its items.
11mod private
12{
13  use std::
14  {
15    collections ::HashMap,
16    sync ::{ Arc, Mutex },
17    time ::Instant,
18  };
19  use core::time::Duration;
20  use serde::{ Serialize, Deserialize };
21
22  use crate::components::common::ResponseUsage;
23
24  /// Configuration for diagnostics collection behavior
25  ///
26  /// Groups related diagnostic settings to avoid excessive boolean parameters.
27  /// Uses structured configuration pattern following best practices.
28  #[ derive( Debug, Clone, Serialize, Deserialize ) ]
29  pub struct DiagnosticsConfig
30  {
31    /// Core collection settings
32    pub collection : DiagnosticsCollectionConfig,
33    /// Performance tracking settings
34    pub performance : DiagnosticsPerformanceConfig,
35    /// Maximum number of request/response cycles to keep in history
36    pub max_history_size : usize,
37  }
38
39  /// Configuration for what data to collect
40  #[ derive( Debug, Clone, Serialize, Deserialize ) ]
41  #[ allow( clippy::struct_excessive_bools ) ]
42  pub struct DiagnosticsCollectionConfig
43  {
44    /// Whether diagnostics collection is enabled
45    pub enabled : bool,
46    /// Whether to collect request headers (may contain sensitive data)
47    pub request_headers : bool,
48    /// Whether to collect response headers
49    pub response_headers : bool,
50    /// Whether to collect request body (may contain sensitive data)
51    pub request_body : bool,
52    /// Whether to collect response body (may contain sensitive data)
53    pub response_body : bool,
54  }
55
56  /// Configuration for performance metrics
57  #[ derive( Debug, Clone, Serialize, Deserialize ) ]
58  pub struct DiagnosticsPerformanceConfig
59  {
60    /// Whether to track performance metrics
61    pub enabled : bool,
62  }
63
64  impl Default for DiagnosticsConfig
65  {
66    #[ inline ]
67    fn default() -> Self
68    {
69      Self
70      {
71        collection : DiagnosticsCollectionConfig::default(),
72        performance : DiagnosticsPerformanceConfig::default(),
73        max_history_size : 100,
74      }
75    }
76  }
77
78  impl Default for DiagnosticsCollectionConfig
79  {
80    #[ inline ]
81    fn default() -> Self
82    {
83      Self
84      {
85        enabled : true,
86        request_headers : false, // Privacy-conscious default
87        response_headers : false,
88        request_body : false,
89        response_body : false,
90      }
91    }
92  }
93
94  impl Default for DiagnosticsPerformanceConfig
95  {
96    #[ inline ]
97    fn default() -> Self
98    {
99      Self
100      {
101        enabled : true,
102      }
103    }
104  }
105
106  /// Metrics for a single request
107  #[ derive( Debug, Clone ) ]
108  pub struct RequestMetrics
109  {
110    /// Timestamp when request was initiated
111    pub timestamp : Instant,
112    /// HTTP method (GET, POST, etc.)
113    pub method : String,
114    /// API endpoint being called
115    pub endpoint : String,
116    /// Request headers (if collection is enabled)
117    pub headers : Vec< (String, String) >,
118    /// Size of request body in bytes
119    pub body_size : usize,
120    /// User agent string
121    pub user_agent : String,
122  }
123
124  /// Metrics for a single response
125  #[ derive( Debug, Clone ) ]
126  pub struct ResponseMetrics
127  {
128    /// Timestamp when response was received
129    pub timestamp : Instant,
130    /// HTTP status code
131    pub status_code : u16,
132    /// Response headers (if collection is enabled)
133    pub headers : Vec< (String, String) >,
134    /// Size of response body in bytes
135    pub body_size : usize,
136    /// Total response time
137    pub response_time : Duration,
138    /// Token usage information (if available)
139    pub tokens_used : Option< ResponseUsage >,
140  }
141
142  /// Metrics for tracking errors
143  #[ derive( Debug, Clone ) ]
144  pub struct ErrorMetrics
145  {
146    /// Timestamp when error occurred
147    pub timestamp : Instant,
148    /// Type/category of error
149    pub error_type : String,
150    /// HTTP error code (if applicable)
151    pub error_code : Option< u16 >,
152    /// Human-readable error message
153    pub error_message : String,
154    /// Number of retry attempts made
155    pub retry_count : u32,
156    /// Whether this was the final failure (no more retries)
157    pub final_failure : bool,
158  }
159
160  /// Aggregated performance metrics
161  #[ derive( Debug, Clone ) ]
162  pub struct PerformanceMetrics
163  {
164    /// Total number of requests made
165    pub total_requests : u64,
166    /// Number of successful requests
167    pub successful_requests : u64,
168    /// Number of failed requests
169    pub failed_requests : u64,
170    /// Average response time across all requests
171    pub average_response_time : Duration,
172    /// Minimum response time observed
173    pub min_response_time : Duration,
174    /// Maximum response time observed
175    pub max_response_time : Duration,
176    /// Total tokens consumed across all requests
177    pub total_tokens_used : u64,
178    /// Average requests per minute
179    pub requests_per_minute : f64,
180    /// Error rate (failed / total)
181    pub error_rate : f64,
182  }
183
184  /// Combined request/response metrics
185  #[ derive( Debug, Clone ) ]
186  pub struct RequestResponseMetrics
187  {
188    /// Request metrics
189    pub request : RequestMetrics,
190    /// Response metrics (None if request failed before response)
191    pub response : Option< ResponseMetrics >,
192    /// Error metrics (None if request succeeded)
193    pub error : Option< ErrorMetrics >,
194  }
195
196  /// Comprehensive diagnostics report
197  #[ derive( Debug, Clone ) ]
198  pub struct DiagnosticsReport
199  {
200    /// When this report was generated
201    pub generated_at : Instant,
202    /// Time range covered by this report
203    pub time_range : Duration,
204    /// Aggregated performance metrics
205    pub performance : PerformanceMetrics,
206    /// Top endpoints by request count
207    pub top_endpoints : Vec< (String, u64) >,
208    /// Error summary by error type
209    pub error_summary : Vec< (String, u64) >,
210  }
211
212  /// Main diagnostics collector
213  #[ derive( Debug ) ]
214  pub struct DiagnosticsCollector
215  {
216    /// Configuration for diagnostics collection
217    pub config : DiagnosticsConfig,
218    /// History of request/response cycles
219    metrics_history : Arc< Mutex< Vec< RequestResponseMetrics > > >,
220    /// Request counter
221    request_count : Arc< Mutex< u64 > >,
222    /// Error counter
223    error_count : Arc< Mutex< u64 > >,
224    /// Start time for rate calculations
225    start_time : Instant,
226  }
227
228  impl DiagnosticsCollector
229  {
230    /// Create a new diagnostics collector with the given configuration
231    #[ inline ]
232    #[ must_use ]
233    pub fn new( config : DiagnosticsConfig ) -> Self
234    {
235      Self
236      {
237        config,
238        metrics_history : Arc::new( Mutex::new( Vec::new() ) ),
239        request_count : Arc::new( Mutex::new( 0 ) ),
240        error_count : Arc::new( Mutex::new( 0 ) ),
241        start_time : Instant::now(),
242      }
243    }
244
245    /// Record a request being made
246    ///
247    /// # Panics
248    ///
249    /// Panics if the internal mutex is poisoned.
250    #[ inline ]
251    pub fn record_request( &self, metrics : &RequestMetrics )
252    {
253      if !self.config.collection.enabled
254      {
255        return;
256      }
257
258      let mut count = self.request_count.lock().unwrap();
259      *count += 1;
260
261      // Create a new request/response entry
262      let entry = RequestResponseMetrics
263      {
264        request : metrics.clone(),
265        response : None,
266        error : None,
267      };
268
269      let mut history = self.metrics_history.lock().unwrap();
270      history.push( entry );
271
272      // Maintain history size limit
273      if history.len() > self.config.max_history_size
274      {
275        history.remove( 0 );
276      }
277    }
278
279    /// Record a response being received
280    ///
281    /// # Panics
282    ///
283    /// Panics if the internal mutex is poisoned.
284    #[ inline ]
285    pub fn record_response( &self, metrics : &ResponseMetrics )
286    {
287      if !self.config.collection.enabled
288      {
289        return;
290      }
291
292      let mut history = self.metrics_history.lock().unwrap();
293      if let Some( last_entry ) = history.last_mut()
294      {
295        last_entry.response = Some( metrics.clone() );
296      }
297    }
298
299    /// Record an error occurring
300    ///
301    /// # Panics
302    ///
303    /// Panics if the internal mutex is poisoned.
304    #[ inline ]
305    pub fn record_error( &self, metrics : &ErrorMetrics )
306    {
307      if !self.config.collection.enabled
308      {
309        return;
310      }
311
312      let mut error_count = self.error_count.lock().unwrap();
313      *error_count += 1;
314
315      let mut history = self.metrics_history.lock().unwrap();
316      if let Some( last_entry ) = history.last_mut()
317      {
318        last_entry.error = Some( metrics.clone() );
319      }
320    }
321
322    /// Get total number of requests made
323    ///
324    /// # Panics
325    ///
326    /// Panics if the internal mutex is poisoned.
327    #[ inline ]
328    #[ must_use ]
329    pub fn get_request_count( &self ) -> u64
330    {
331      *self.request_count.lock().unwrap()
332    }
333
334    /// Get total number of errors encountered
335    ///
336    /// # Panics
337    ///
338    /// Panics if the internal mutex is poisoned.
339    #[ inline ]
340    #[ must_use ]
341    pub fn get_error_count( &self ) -> u64
342    {
343      *self.error_count.lock().unwrap()
344    }
345
346    /// Get the full metrics history
347    ///
348    /// # Panics
349    ///
350    /// Panics if the internal mutex is poisoned.
351    #[ inline ]
352    #[ must_use ]
353    pub fn get_metrics( &self ) -> Vec< RequestResponseMetrics >
354    {
355      self.metrics_history.lock().unwrap().clone()
356    }
357
358    /// Get error metrics only
359    ///
360    /// # Panics
361    ///
362    /// Panics if the internal mutex is poisoned.
363    #[ inline ]
364    #[ must_use ]
365    pub fn get_error_metrics( &self ) -> Vec< ErrorMetrics >
366    {
367      self.metrics_history
368        .lock()
369        .unwrap()
370        .iter()
371        .filter_map( |entry| entry.error.clone() )
372        .collect()
373    }
374
375    /// Calculate aggregated performance metrics
376    ///
377    /// # Panics
378    ///
379    /// Panics if the internal mutex is poisoned.
380    #[ inline ]
381    #[ must_use ]
382    pub fn get_performance_metrics( &self ) -> PerformanceMetrics
383    {
384      let history = self.metrics_history.lock().unwrap();
385      let total_requests = history.len() as u64;
386
387      if total_requests == 0
388      {
389        return PerformanceMetrics
390        {
391          total_requests : 0,
392          successful_requests : 0,
393          failed_requests : 0,
394          average_response_time : Duration::from_millis( 0 ),
395          min_response_time : Duration::from_millis( 0 ),
396          max_response_time : Duration::from_millis( 0 ),
397          total_tokens_used : 0,
398          requests_per_minute : 0.0,
399          error_rate : 0.0,
400        };
401      }
402
403      let successful_requests = history.iter().filter( |entry| entry.response.is_some() ).count() as u64;
404      let failed_requests = total_requests - successful_requests;
405
406      let response_times : Vec< Duration > = history
407        .iter()
408        .filter_map( |entry| entry.response.as_ref().map( |r| r.response_time ) )
409        .collect();
410
411      let average_response_time = if response_times.is_empty()
412      {
413        Duration::from_millis( 0 )
414      }
415      else
416      {
417        let total_ms : u64 = response_times.iter().map( |d| u64::try_from( d.as_millis() ).unwrap_or( u64::MAX ) ).sum();
418        Duration::from_millis( total_ms / response_times.len() as u64 )
419      };
420
421      let min_response_time = response_times.iter().min().copied().unwrap_or( Duration::from_millis( 0 ) );
422      let max_response_time = response_times.iter().max().copied().unwrap_or( Duration::from_millis( 0 ) );
423
424      let total_tokens_used = history
425        .iter()
426        .filter_map( |entry| entry.response.as_ref().and_then( |r| r.tokens_used.as_ref() ) )
427        .map( |usage| u64::from( usage.total_tokens ) )
428        .sum();
429
430      let elapsed_minutes = self.start_time.elapsed().as_secs_f64() / 60.0;
431      let requests_per_minute = if elapsed_minutes > 0.0
432      {
433        total_requests as f64 / elapsed_minutes
434      }
435      else
436      {
437        0.0
438      };
439
440      let error_rate = if total_requests > 0
441      {
442        failed_requests as f64 / total_requests as f64
443      }
444      else
445      {
446        0.0
447      };
448
449      PerformanceMetrics
450      {
451        total_requests,
452        successful_requests,
453        failed_requests,
454        average_response_time,
455        min_response_time,
456        max_response_time,
457        total_tokens_used,
458        requests_per_minute,
459        error_rate,
460      }
461    }
462
463    /// Generate a comprehensive diagnostics report
464    ///
465    /// # Panics
466    ///
467    /// Panics if the internal mutex is poisoned or if time arithmetic fails.
468    #[ inline ]
469    #[ must_use ]
470    pub fn generate_report( &self, time_range : Duration ) -> DiagnosticsReport
471    {
472      let history = self.metrics_history.lock().unwrap();
473      let cutoff_time = Instant::now().checked_sub( time_range ).unwrap();
474
475      // Filter to time range
476      let recent_metrics : Vec< _ > = history
477        .iter()
478        .filter( |entry| entry.request.timestamp >= cutoff_time )
479        .collect();
480
481      // Count endpoints
482      let mut endpoint_counts : HashMap<  String, u64  > = HashMap::new();
483      for entry in &recent_metrics
484      {
485        *endpoint_counts.entry( entry.request.endpoint.clone() ).or_insert( 0 ) += 1;
486      }
487
488      let mut top_endpoints : Vec< _ > = endpoint_counts.into_iter().collect();
489      top_endpoints.sort_by( |a, b| b.1.cmp( &a.1 ) );
490
491      // Count error types
492      let mut error_counts : HashMap<  String, u64  > = HashMap::new();
493      for entry in &recent_metrics
494      {
495        if let Some( error ) = &entry.error
496        {
497          *error_counts.entry( error.error_type.clone() ).or_insert( 0 ) += 1;
498        }
499      }
500
501      let mut error_summary : Vec< _ > = error_counts.into_iter().collect();
502      error_summary.sort_by( |a, b| b.1.cmp( &a.1 ) );
503
504      DiagnosticsReport
505      {
506        generated_at : Instant::now(),
507        time_range,
508        performance : self.get_performance_metrics(),
509        top_endpoints,
510        error_summary,
511      }
512    }
513
514    /// Estimate memory usage of the diagnostics collector
515    ///
516    /// # Panics
517    ///
518    /// Panics if the internal mutex is poisoned.
519    #[ inline ]
520    #[ must_use ]
521    pub fn estimate_memory_usage( &self ) -> usize
522    {
523      let history = self.metrics_history.lock().unwrap();
524      // Rough estimate : each entry is about 1KB
525      history.len() * 1024
526    }
527  }
528}
529
530crate ::mod_interface!
531{
532  exposed use
533  {
534    DiagnosticsConfig,
535    DiagnosticsCollectionConfig,
536    DiagnosticsPerformanceConfig,
537    DiagnosticsCollector,
538    RequestMetrics,
539    ResponseMetrics,
540    ErrorMetrics,
541    PerformanceMetrics,
542    RequestResponseMetrics,
543    DiagnosticsReport,
544  };
545}