trustformers_debug/
llm_debugging.rs

1//! Large Language Model (LLM) Specific Debugging
2//!
3//! This module provides specialized debugging capabilities for large language models,
4//! focusing on safety, alignment, factuality, toxicity detection, and performance
5//! characteristics specific to modern LLMs.
6
7use anyhow::Result;
8// use scirs2_core::ndarray::*; // SciRS2 Integration Policy - was: use ndarray::{Array, ArrayD, IxDyn};
9use serde::{Deserialize, Serialize};
10use std::collections::{HashMap, HashSet};
11use std::time::{Duration, Instant};
12
13/// Main LLM debugging framework
14#[derive(Debug)]
15pub struct LLMDebugger {
16    config: LLMDebugConfig,
17    safety_analyzer: SafetyAnalyzer,
18    factuality_checker: FactualityChecker,
19    alignment_monitor: AlignmentMonitor,
20    hallucination_detector: HallucinationDetector,
21    bias_detector: BiasDetector,
22    performance_profiler: LLMPerformanceProfiler,
23    conversation_analyzer: ConversationAnalyzer,
24}
25
26/// Configuration for LLM debugging
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct LLMDebugConfig {
29    /// Enable safety analysis (toxicity, harmful content)
30    pub enable_safety_analysis: bool,
31    /// Enable factuality checking
32    pub enable_factuality_checking: bool,
33    /// Enable alignment monitoring
34    pub enable_alignment_monitoring: bool,
35    /// Enable hallucination detection
36    pub enable_hallucination_detection: bool,
37    /// Enable bias detection
38    pub enable_bias_detection: bool,
39    /// Enable performance profiling for LLM-specific metrics
40    pub enable_llm_performance_profiling: bool,
41    /// Enable conversation flow analysis
42    pub enable_conversation_analysis: bool,
43    /// Threshold for safety score (0.0 to 1.0)
44    pub safety_threshold: f32,
45    /// Threshold for factuality score (0.0 to 1.0)
46    pub factuality_threshold: f32,
47    /// Maximum conversation length to analyze
48    pub max_conversation_length: usize,
49    /// Sampling rate for expensive analyses
50    pub analysis_sampling_rate: f32,
51}
52
53impl Default for LLMDebugConfig {
54    fn default() -> Self {
55        Self {
56            enable_safety_analysis: true,
57            enable_factuality_checking: true,
58            enable_alignment_monitoring: true,
59            enable_hallucination_detection: true,
60            enable_bias_detection: true,
61            enable_llm_performance_profiling: true,
62            enable_conversation_analysis: true,
63            safety_threshold: 0.8,
64            factuality_threshold: 0.7,
65            max_conversation_length: 100,
66            analysis_sampling_rate: 1.0,
67        }
68    }
69}
70
71/// Safety analyzer for detecting harmful, toxic, or inappropriate content
72#[derive(Debug)]
73#[allow(dead_code)]
74pub struct SafetyAnalyzer {
75    #[allow(dead_code)]
76    toxic_patterns: HashSet<String>,
77    harm_categories: Vec<HarmCategory>,
78    safety_metrics: SafetyMetrics,
79}
80
81/// Categories of potential harm in LLM outputs
82#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
83pub enum HarmCategory {
84    Toxicity,       // Toxic, offensive, or inappropriate language
85    Violence,       // Violence or threats
86    SelfHarm,       // Self-harm or suicide-related content
87    Harassment,     // Harassment or bullying
88    HateSpeech,     // Hate speech or discrimination
89    Sexual,         // Sexual or adult content
90    Privacy,        // Privacy violations or doxxing
91    Misinformation, // Misinformation or conspiracy theories
92    Manipulation,   // Social manipulation or deception
93    Illegal,        // Illegal activities or advice
94}
95
96/// Safety metrics for tracking harmful content
97#[derive(Debug, Clone, Serialize, Deserialize)]
98pub struct SafetyMetrics {
99    pub overall_safety_score: f32,
100    pub harm_category_scores: HashMap<HarmCategory, f32>,
101    pub flagged_responses: usize,
102    pub total_responses_analyzed: usize,
103    pub average_response_safety: f32,
104    pub safety_trend: SafetyTrend,
105}
106
107/// Trend in safety scores over time
108#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
109pub enum SafetyTrend {
110    Improving,
111    Stable,
112    Degrading,
113    Volatile,
114}
115
116/// Factuality checker for verifying the accuracy of LLM outputs
117#[derive(Debug)]
118pub struct FactualityChecker {
119    #[allow(dead_code)]
120    fact_databases: Vec<String>,
121    uncertainty_indicators: HashSet<String>,
122    factuality_metrics: FactualityMetrics,
123}
124
125/// Metrics for tracking factual accuracy
126#[derive(Debug, Clone, Serialize, Deserialize)]
127pub struct FactualityMetrics {
128    pub overall_factuality_score: f32,
129    pub verified_facts: usize,
130    pub unverified_claims: usize,
131    pub conflicting_information: usize,
132    pub uncertainty_expressions: usize,
133    pub knowledge_gaps: Vec<String>,
134    pub confidence_distribution: Vec<f32>,
135}
136
137/// Alignment monitor for ensuring LLM outputs align with intended behavior
138#[allow(dead_code)]
139#[derive(Debug)]
140pub struct AlignmentMonitor {
141    #[allow(dead_code)]
142    alignment_objectives: Vec<AlignmentObjective>,
143    alignment_metrics: AlignmentMetrics,
144    value_alignment_score: f32,
145}
146
147/// Types of alignment objectives for LLMs
148#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
149pub enum AlignmentObjective {
150    Helpfulness,    // Be helpful and informative
151    Harmlessness,   // Avoid causing harm
152    Honesty,        // Be truthful and transparent
153    Fairness,       // Treat all users fairly
154    Privacy,        // Respect privacy and confidentiality
155    Transparency,   // Be clear about limitations
156    Consistency,    // Maintain consistent behavior
157    Responsibility, // Take appropriate responsibility for outputs
158}
159
160/// Metrics for alignment monitoring
161#[derive(Debug, Clone, Serialize, Deserialize)]
162pub struct AlignmentMetrics {
163    pub objective_scores: HashMap<AlignmentObjective, f32>,
164    pub overall_alignment_score: f32,
165    pub alignment_violations: usize,
166    pub value_consistency_score: f32,
167    pub behavioral_drift: f32,
168    pub alignment_trend: AlignmentTrend,
169}
170
171/// Trend in alignment scores over time
172#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
173pub enum AlignmentTrend {
174    Improving,
175    Stable,
176    Degrading,
177    Inconsistent,
178}
179
180#[allow(dead_code)]
181/// Hallucination detector for identifying false or fabricated information
182#[derive(Debug)]
183pub struct HallucinationDetector {
184    #[allow(dead_code)]
185    confidence_thresholds: HashMap<String, f32>,
186    consistency_checker: ConsistencyChecker,
187    hallucination_metrics: HallucinationMetrics,
188}
189
190/// Metrics for hallucination detection
191#[derive(Debug, Clone, Serialize, Deserialize)]
192pub struct HallucinationMetrics {
193    pub hallucination_rate: f32,
194    pub confidence_accuracy_correlation: f32,
195    pub factual_consistency_score: f32,
196    pub internal_consistency_score: f32,
197    pub source_attribution_accuracy: f32,
198    pub detected_fabrications: usize,
199    pub uncertain_responses: usize,
200}
201
202/// Consistency checker for internal consistency in responses
203#[derive(Debug)]
204pub struct ConsistencyChecker {
205    previous_responses: Vec<String>,
206    #[allow(dead_code)]
207    consistency_cache: HashMap<String, f32>,
208}
209#[allow(dead_code)]
210
211/// Bias detector for identifying various forms of bias in LLM outputs
212#[derive(Debug)]
213pub struct BiasDetector {
214    #[allow(dead_code)]
215    bias_categories: Vec<BiasCategory>,
216    demographic_groups: Vec<String>,
217    bias_metrics: BiasMetrics,
218}
219
220/// Types of bias to detect in LLM outputs
221#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
222pub enum BiasCategory {
223    Gender,        // Gender-based bias
224    Race,          // Racial or ethnic bias
225    Religion,      // Religious bias
226    Age,           // Age-based bias
227    SocioEconomic, // Socioeconomic bias
228    Geographic,    // Geographic or cultural bias
229    Political,     // Political bias
230    Linguistic,    // Language or accent bias
231    Ability,       // Disability or ability bias
232    Appearance,    // Physical appearance bias
233}
234
235/// Metrics for bias detection
236#[derive(Debug, Clone, Serialize, Deserialize)]
237pub struct BiasMetrics {
238    pub overall_bias_score: f32,
239    pub bias_category_scores: HashMap<BiasCategory, f32>,
240    pub demographic_fairness: HashMap<String, f32>,
241    pub representation_bias: f32,
242    pub stereotype_propagation: f32,
243    pub bias_amplification: f32,
244    pub fairness_violations: usize,
245}
246
247/// Performance profiler specific to LLM characteristics
248#[derive(Debug)]
249pub struct LLMPerformanceProfiler {
250    generation_metrics: GenerationMetrics,
251    efficiency_metrics: EfficiencyMetrics,
252    quality_metrics: QualityMetrics,
253    #[allow(dead_code)]
254    scalability_metrics: ScalabilityMetrics,
255}
256
257/// Metrics for text generation performance
258#[derive(Debug, Clone, Serialize, Deserialize)]
259pub struct GenerationMetrics {
260    pub tokens_per_second: f32,
261    pub average_response_length: f32,
262    pub generation_latency_p50: f32,
263    pub generation_latency_p95: f32,
264    pub generation_latency_p99: f32,
265    pub first_token_latency: f32,
266    pub completion_rate: f32,
267    pub timeout_rate: f32,
268}
269
270/// Metrics for computational efficiency
271#[derive(Debug, Clone, Serialize, Deserialize)]
272pub struct EfficiencyMetrics {
273    pub memory_efficiency: f32,
274    pub compute_utilization: f32,
275    pub energy_consumption: f32,
276    pub carbon_footprint_estimate: f32,
277    pub cost_per_token: f32,
278    pub batch_processing_efficiency: f32,
279    pub cache_hit_rate: f32,
280}
281
282/// Metrics for output quality
283#[derive(Debug, Clone, Serialize, Deserialize)]
284pub struct QualityMetrics {
285    pub coherence_score: f32,
286    pub relevance_score: f32,
287    pub fluency_score: f32,
288    pub informativeness_score: f32,
289    pub creativity_score: f32,
290    pub factual_accuracy: f32,
291    pub readability_score: f32,
292    pub engagement_score: f32,
293}
294
295/// Metrics for scalability analysis
296#[derive(Debug, Clone, Serialize, Deserialize)]
297pub struct ScalabilityMetrics {
298    pub concurrent_user_capacity: usize,
299    pub throughput_scaling: f32,
300    pub memory_scaling: f32,
301    pub latency_degradation: f32,
302    pub bottleneck_analysis: Vec<String>,
303    pub resource_utilization_efficiency: f32,
304}
305
306/// Conversation analyzer for multi-turn dialog analysis
307#[derive(Debug)]
308pub struct ConversationAnalyzer {
309    conversation_history: Vec<ConversationTurn>,
310    dialog_metrics: DialogMetrics,
311    context_tracking: ContextTracker,
312}
313
314/// Single turn in a conversation
315#[derive(Debug, Clone, Serialize, Deserialize)]
316pub struct ConversationTurn {
317    pub turn_id: usize,
318    pub user_input: String,
319    pub model_response: String,
320    pub timestamp: chrono::DateTime<chrono::Utc>,
321    pub context_length: usize,
322    pub response_time: Duration,
323}
324
325/// Metrics for dialog analysis
326#[derive(Debug, Clone, Serialize, Deserialize)]
327pub struct DialogMetrics {
328    pub conversation_coherence: f32,
329    pub context_maintenance: f32,
330    pub topic_consistency: f32,
331    pub response_appropriateness: f32,
332    pub conversation_engagement: f32,
333    pub turn_taking_naturalness: f32,
334    pub memory_utilization: f32,
335    pub dialog_success_rate: f32,
336}
337
338/// Context tracking for conversation continuity
339#[derive(Debug)]
340#[allow(dead_code)]
341pub struct ContextTracker {
342    #[allow(dead_code)]
343    active_topics: HashSet<String>,
344    entity_mentions: HashMap<String, usize>,
345    context_window: Vec<String>,
346    attention_weights: Vec<f32>,
347}
348
349impl LLMDebugger {
350    /// Create a new LLM debugger
351    pub fn new(config: LLMDebugConfig) -> Self {
352        Self {
353            config: config.clone(),
354            safety_analyzer: SafetyAnalyzer::new(&config),
355            factuality_checker: FactualityChecker::new(&config),
356            alignment_monitor: AlignmentMonitor::new(&config),
357            hallucination_detector: HallucinationDetector::new(&config),
358            bias_detector: BiasDetector::new(&config),
359            performance_profiler: LLMPerformanceProfiler::new(),
360            conversation_analyzer: ConversationAnalyzer::new(&config),
361        }
362    }
363
364    /// Comprehensive LLM analysis of a model response
365    pub async fn analyze_response(
366        &mut self,
367        user_input: &str,
368        model_response: &str,
369        context: Option<&[String]>,
370        generation_metrics: Option<GenerationMetrics>,
371    ) -> Result<LLMAnalysisReport> {
372        let start_time = Instant::now();
373
374        // Safety analysis
375        let safety_analysis = if self.config.enable_safety_analysis {
376            Some(self.safety_analyzer.analyze_safety(model_response).await?)
377        } else {
378            None
379        };
380
381        // Factuality checking
382        let factuality_analysis = if self.config.enable_factuality_checking {
383            Some(self.factuality_checker.check_factuality(model_response, context).await?)
384        } else {
385            None
386        };
387
388        // Alignment monitoring
389        let alignment_analysis = if self.config.enable_alignment_monitoring {
390            Some(self.alignment_monitor.check_alignment(user_input, model_response).await?)
391        } else {
392            None
393        };
394
395        // Hallucination detection
396        let hallucination_analysis = if self.config.enable_hallucination_detection {
397            Some(
398                self.hallucination_detector
399                    .detect_hallucinations(model_response, context)
400                    .await?,
401            )
402        } else {
403            None
404        };
405
406        // Bias detection
407        let bias_analysis = if self.config.enable_bias_detection {
408            Some(self.bias_detector.detect_bias(model_response).await?)
409        } else {
410            None
411        };
412
413        // Performance profiling
414        let performance_analysis = if self.config.enable_llm_performance_profiling {
415            Some(
416                self.performance_profiler
417                    .profile_response(&model_response, generation_metrics)
418                    .await?,
419            )
420        } else {
421            None
422        };
423
424        // Conversation analysis (if part of a dialog)
425        let conversation_analysis = if self.config.enable_conversation_analysis {
426            let turn = ConversationTurn {
427                turn_id: self.conversation_analyzer.conversation_history.len(),
428                user_input: user_input.to_string(),
429                model_response: model_response.to_string(),
430                timestamp: chrono::Utc::now(),
431                context_length: context.map(|c| c.len()).unwrap_or(0),
432                response_time: start_time.elapsed(),
433            };
434            Some(self.conversation_analyzer.analyze_turn(&turn).await?)
435        } else {
436            None
437        };
438
439        let analysis_duration = start_time.elapsed();
440
441        Ok(LLMAnalysisReport {
442            input: user_input.to_string(),
443            response: model_response.to_string(),
444            safety_analysis: safety_analysis.clone(),
445            factuality_analysis: factuality_analysis.clone(),
446            alignment_analysis: alignment_analysis.clone(),
447            hallucination_analysis,
448            bias_analysis,
449            performance_analysis,
450            conversation_analysis,
451            overall_score: self.compute_overall_score(
452                &safety_analysis,
453                &factuality_analysis,
454                &alignment_analysis,
455            ),
456            recommendations: self.generate_recommendations(
457                &safety_analysis,
458                &factuality_analysis,
459                &alignment_analysis,
460            ),
461            analysis_duration,
462            timestamp: chrono::Utc::now(),
463        })
464    }
465
466    /// Batch analysis of multiple responses
467    pub async fn analyze_batch(
468        &mut self,
469        interactions: &[(String, String)], // (input, response) pairs
470    ) -> Result<BatchLLMAnalysisReport> {
471        let mut individual_reports = Vec::new();
472        let mut batch_metrics = BatchMetrics::default();
473
474        for (input, response) in interactions {
475            let report = self.analyze_response(input, response, None, None).await?;
476            batch_metrics.update_from_report(&report);
477            individual_reports.push(report);
478        }
479
480        batch_metrics.finalize(interactions.len());
481
482        Ok(BatchLLMAnalysisReport {
483            individual_reports,
484            batch_metrics,
485            batch_size: interactions.len(),
486            analysis_timestamp: chrono::Utc::now(),
487        })
488    }
489
490    /// Generate comprehensive LLM health report
491    pub async fn generate_health_report(&mut self) -> Result<LLMHealthReport> {
492        Ok(LLMHealthReport {
493            overall_health_score: self.compute_overall_health(),
494            safety_health: self.safety_analyzer.get_health_summary(),
495            factuality_health: self.factuality_checker.get_health_summary(),
496            alignment_health: self.alignment_monitor.get_health_summary(),
497            bias_health: self.bias_detector.get_health_summary(),
498            performance_health: self.performance_profiler.get_health_summary(),
499            conversation_health: self.conversation_analyzer.get_health_summary(),
500            critical_issues: self.identify_critical_issues(),
501            recommendations: self.generate_health_recommendations(),
502            report_timestamp: chrono::Utc::now(),
503        })
504    }
505
506    /// Compute overall score from analysis components
507    fn compute_overall_score(
508        &self,
509        safety: &Option<SafetyAnalysisResult>,
510        factuality: &Option<FactualityAnalysisResult>,
511        alignment: &Option<AlignmentAnalysisResult>,
512    ) -> f32 {
513        let mut total_score = 0.0;
514        let mut weight_sum = 0.0;
515
516        if let Some(s) = safety {
517            total_score += s.safety_score * 0.3;
518            weight_sum += 0.3;
519        }
520
521        if let Some(f) = factuality {
522            total_score += f.factuality_score * 0.3;
523            weight_sum += 0.3;
524        }
525
526        if let Some(a) = alignment {
527            total_score += a.alignment_score * 0.4;
528            weight_sum += 0.4;
529        }
530
531        if weight_sum > 0.0 {
532            total_score / weight_sum
533        } else {
534            0.0
535        }
536    }
537
538    /// Generate actionable recommendations
539    fn generate_recommendations(
540        &self,
541        safety: &Option<SafetyAnalysisResult>,
542        factuality: &Option<FactualityAnalysisResult>,
543        alignment: &Option<AlignmentAnalysisResult>,
544    ) -> Vec<String> {
545        let mut recommendations = Vec::new();
546
547        if let Some(s) = safety {
548            if s.safety_score < self.config.safety_threshold {
549                recommendations
550                    .push("Consider additional safety filtering or fine-tuning".to_string());
551            }
552        }
553
554        if let Some(f) = factuality {
555            if f.factuality_score < self.config.factuality_threshold {
556                recommendations
557                    .push("Verify factual claims and consider knowledge base updates".to_string());
558            }
559        }
560
561        if let Some(a) = alignment {
562            if a.alignment_score < 0.7 {
563                recommendations.push(
564                    "Review alignment objectives and consider additional RLHF training".to_string(),
565                );
566            }
567        }
568
569        recommendations
570    }
571
572    /// Compute overall health score
573    fn compute_overall_health(&self) -> f32 {
574        // Simplified implementation - would aggregate across all analyzers
575        (self.safety_analyzer.safety_metrics.overall_safety_score
576            + self.factuality_checker.factuality_metrics.overall_factuality_score
577            + self.alignment_monitor.alignment_metrics.overall_alignment_score)
578            / 3.0
579    }
580
581    /// Identify critical issues requiring immediate attention
582    fn identify_critical_issues(&self) -> Vec<CriticalIssue> {
583        let mut issues = Vec::new();
584
585        // Check safety issues
586        if self.safety_analyzer.safety_metrics.overall_safety_score < 0.5 {
587            issues.push(CriticalIssue {
588                category: IssueCategory::Safety,
589                severity: IssueSeverity::Critical,
590                description: "Low overall safety score detected".to_string(),
591                recommended_action: "Immediate safety review and filtering required".to_string(),
592            });
593        }
594
595        // Check alignment issues
596        if self.alignment_monitor.alignment_metrics.overall_alignment_score < 0.6 {
597            issues.push(CriticalIssue {
598                category: IssueCategory::Alignment,
599                severity: IssueSeverity::High,
600                description: "Alignment drift detected".to_string(),
601                recommended_action: "Review training data and consider alignment fine-tuning"
602                    .to_string(),
603            });
604        }
605
606        issues
607    }
608
609    /// Generate health improvement recommendations
610    fn generate_health_recommendations(&self) -> Vec<String> {
611        let mut recommendations = Vec::new();
612
613        // Add safety recommendations
614        if self.safety_analyzer.safety_metrics.overall_safety_score < 0.8 {
615            recommendations.push("Implement additional safety training data".to_string());
616            recommendations.push("Consider constitutional AI techniques".to_string());
617        }
618
619        // Add performance recommendations
620        if self.performance_profiler.generation_metrics.tokens_per_second < 50.0 {
621            recommendations.push("Optimize inference pipeline for better throughput".to_string());
622            recommendations.push("Consider model quantization or distillation".to_string());
623        }
624
625        recommendations
626    }
627}
628
629// Analysis result structures
630#[derive(Debug, Clone, Serialize, Deserialize)]
631pub struct LLMAnalysisReport {
632    pub input: String,
633    pub response: String,
634    pub safety_analysis: Option<SafetyAnalysisResult>,
635    pub factuality_analysis: Option<FactualityAnalysisResult>,
636    pub alignment_analysis: Option<AlignmentAnalysisResult>,
637    pub hallucination_analysis: Option<HallucinationAnalysisResult>,
638    pub bias_analysis: Option<BiasAnalysisResult>,
639    pub performance_analysis: Option<PerformanceAnalysisResult>,
640    pub conversation_analysis: Option<ConversationAnalysisResult>,
641    pub overall_score: f32,
642    pub recommendations: Vec<String>,
643    pub analysis_duration: Duration,
644    pub timestamp: chrono::DateTime<chrono::Utc>,
645}
646
647#[derive(Debug, Clone, Serialize, Deserialize)]
648pub struct BatchLLMAnalysisReport {
649    pub individual_reports: Vec<LLMAnalysisReport>,
650    pub batch_metrics: BatchMetrics,
651    pub batch_size: usize,
652    pub analysis_timestamp: chrono::DateTime<chrono::Utc>,
653}
654
655#[derive(Debug, Clone, Default, Serialize, Deserialize)]
656pub struct BatchMetrics {
657    pub average_overall_score: f32,
658    pub average_safety_score: f32,
659    pub average_factuality_score: f32,
660    pub average_alignment_score: f32,
661    pub flagged_responses_count: usize,
662    pub critical_issues_count: usize,
663    pub performance_summary: Option<PerformanceAnalysisResult>,
664}
665
666impl BatchMetrics {
667    pub fn update_from_report(&mut self, _report: &LLMAnalysisReport) {
668        // Implementation would accumulate metrics from individual reports
669    }
670
671    pub fn finalize(&mut self, _batch_size: usize) {
672        // Implementation would compute final averages
673    }
674}
675
676#[derive(Debug, Clone, Serialize, Deserialize)]
677pub struct LLMHealthReport {
678    pub overall_health_score: f32,
679    pub safety_health: HealthSummary,
680    pub factuality_health: HealthSummary,
681    pub alignment_health: HealthSummary,
682    pub bias_health: HealthSummary,
683    pub performance_health: HealthSummary,
684    pub conversation_health: HealthSummary,
685    pub critical_issues: Vec<CriticalIssue>,
686    pub recommendations: Vec<String>,
687    pub report_timestamp: chrono::DateTime<chrono::Utc>,
688}
689
690#[derive(Debug, Clone, Serialize, Deserialize)]
691pub struct HealthSummary {
692    pub score: f32,
693    pub status: HealthStatus,
694    pub trend: String,
695    pub key_metrics: HashMap<String, f32>,
696    pub issues: Vec<String>,
697}
698
699#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
700pub enum HealthStatus {
701    Excellent,
702    Good,
703    Fair,
704    Poor,
705    Critical,
706}
707
708#[derive(Debug, Clone, Serialize, Deserialize)]
709pub struct CriticalIssue {
710    pub category: IssueCategory,
711    pub severity: IssueSeverity,
712    pub description: String,
713    pub recommended_action: String,
714}
715
716#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
717pub enum IssueCategory {
718    Safety,
719    Factuality,
720    Alignment,
721    Bias,
722    Performance,
723    Conversation,
724}
725
726#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
727pub enum IssueSeverity {
728    Low,
729    Medium,
730    High,
731    Critical,
732}
733
734// Individual analysis result types
735#[derive(Debug, Clone, Serialize, Deserialize)]
736pub struct SafetyAnalysisResult {
737    pub safety_score: f32,
738    pub detected_harms: Vec<HarmCategory>,
739    pub risk_level: RiskLevel,
740    pub flagged_content: Vec<String>,
741    pub confidence: f32,
742}
743
744#[derive(Debug, Clone, Serialize, Deserialize)]
745pub struct FactualityAnalysisResult {
746    pub factuality_score: f32,
747    pub verified_claims: usize,
748    pub unverified_claims: usize,
749    pub confidence_scores: Vec<f32>,
750    pub knowledge_gaps: Vec<String>,
751}
752
753#[derive(Debug, Clone, Serialize, Deserialize)]
754pub struct AlignmentAnalysisResult {
755    pub alignment_score: f32,
756    pub objective_scores: HashMap<AlignmentObjective, f32>,
757    pub violations: Vec<String>,
758    pub consistency_score: f32,
759}
760
761#[derive(Debug, Clone, Serialize, Deserialize)]
762pub struct HallucinationAnalysisResult {
763    pub hallucination_probability: f32,
764    pub confidence_accuracy: f32,
765    pub internal_consistency: f32,
766    pub detected_fabrications: Vec<String>,
767}
768
769#[derive(Debug, Clone, Serialize, Deserialize)]
770pub struct BiasAnalysisResult {
771    pub overall_bias_score: f32,
772    pub bias_categories: HashMap<BiasCategory, f32>,
773    pub detected_biases: Vec<String>,
774    pub fairness_violations: Vec<String>,
775}
776
777#[derive(Debug, Clone, Serialize, Deserialize)]
778pub struct PerformanceAnalysisResult {
779    pub generation_metrics: GenerationMetrics,
780    pub efficiency_metrics: EfficiencyMetrics,
781    pub quality_metrics: QualityMetrics,
782    pub bottlenecks: Vec<String>,
783}
784
785#[derive(Debug, Clone, Serialize, Deserialize)]
786pub struct ConversationAnalysisResult {
787    pub dialog_metrics: DialogMetrics,
788    pub context_consistency: f32,
789    pub turn_quality: f32,
790    pub engagement_score: f32,
791}
792
793#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
794pub enum RiskLevel {
795    Low,
796    Medium,
797    High,
798    Critical,
799}
800
801// Implementation stubs for analyzer components
802impl SafetyAnalyzer {
803    pub fn new(_config: &LLMDebugConfig) -> Self {
804        Self {
805            toxic_patterns: HashSet::new(),
806            harm_categories: vec![
807                HarmCategory::Toxicity,
808                HarmCategory::Violence,
809                HarmCategory::SelfHarm,
810                HarmCategory::Harassment,
811                HarmCategory::HateSpeech,
812            ],
813            safety_metrics: SafetyMetrics {
814                overall_safety_score: 1.0,
815                harm_category_scores: HashMap::new(),
816                flagged_responses: 0,
817                total_responses_analyzed: 0,
818                average_response_safety: 1.0,
819                safety_trend: SafetyTrend::Stable,
820            },
821        }
822    }
823
824    pub async fn analyze_safety(&mut self, response: &str) -> Result<SafetyAnalysisResult> {
825        // Simplified implementation - would use actual safety models
826        let safety_score = self.compute_safety_score(response);
827        let detected_harms = self.detect_harmful_content(response);
828        let risk_level = self.assess_risk_level(safety_score);
829
830        self.safety_metrics.total_responses_analyzed += 1;
831        if safety_score < 0.8 {
832            self.safety_metrics.flagged_responses += 1;
833        }
834
835        Ok(SafetyAnalysisResult {
836            safety_score,
837            detected_harms,
838            risk_level,
839            flagged_content: vec![], // Would be populated with actual flagged content
840            confidence: 0.85,
841        })
842    }
843
844    fn compute_safety_score(&self, response: &str) -> f32 {
845        // Simplified scoring - real implementation would use trained safety models
846        let harmful_keywords = ["violence", "harm", "toxic", "hate"];
847        let found_harmful = harmful_keywords
848            .iter()
849            .any(|&keyword| response.to_lowercase().contains(keyword));
850
851        if found_harmful {
852            0.3
853        } else {
854            0.95
855        }
856    }
857
858    fn detect_harmful_content(&self, response: &str) -> Vec<HarmCategory> {
859        // Simplified detection - real implementation would use specialized classifiers
860        let mut detected = Vec::new();
861
862        if response.to_lowercase().contains("violence") {
863            detected.push(HarmCategory::Violence);
864        }
865        if response.to_lowercase().contains("toxic") {
866            detected.push(HarmCategory::Toxicity);
867        }
868
869        detected
870    }
871
872    fn assess_risk_level(&self, safety_score: f32) -> RiskLevel {
873        if safety_score >= 0.9 {
874            RiskLevel::Low
875        } else if safety_score >= 0.7 {
876            RiskLevel::Medium
877        } else if safety_score >= 0.5 {
878            RiskLevel::High
879        } else {
880            RiskLevel::Critical
881        }
882    }
883
884    pub fn get_health_summary(&self) -> HealthSummary {
885        HealthSummary {
886            score: self.safety_metrics.overall_safety_score,
887            status: if self.safety_metrics.overall_safety_score >= 0.9 {
888                HealthStatus::Excellent
889            } else if self.safety_metrics.overall_safety_score >= 0.7 {
890                HealthStatus::Good
891            } else {
892                HealthStatus::Poor
893            },
894            trend: format!("{:?}", self.safety_metrics.safety_trend),
895            key_metrics: HashMap::new(),
896            issues: vec![],
897        }
898    }
899}
900
901impl FactualityChecker {
902    pub fn new(_config: &LLMDebugConfig) -> Self {
903        Self {
904            fact_databases: vec!["wikipedia".to_string(), "wikidata".to_string()],
905            uncertainty_indicators: ["might", "possibly", "unclear", "uncertain"]
906                .iter()
907                .map(|s| s.to_string())
908                .collect(),
909            factuality_metrics: FactualityMetrics {
910                overall_factuality_score: 0.8,
911                verified_facts: 0,
912                unverified_claims: 0,
913                conflicting_information: 0,
914                uncertainty_expressions: 0,
915                knowledge_gaps: vec![],
916                confidence_distribution: vec![],
917            },
918        }
919    }
920
921    pub async fn check_factuality(
922        &mut self,
923        response: &str,
924        _context: Option<&[String]>,
925    ) -> Result<FactualityAnalysisResult> {
926        // Simplified implementation - would use actual fact-checking models
927        let factuality_score = self.compute_factuality_score(response);
928        let verified_claims = self.count_verified_claims(response);
929        let unverified_claims = self.count_unverified_claims(response);
930
931        Ok(FactualityAnalysisResult {
932            factuality_score,
933            verified_claims,
934            unverified_claims,
935            confidence_scores: vec![0.8, 0.7, 0.9], // Mock scores
936            knowledge_gaps: vec![],                 // Would be populated with actual gaps
937        })
938    }
939
940    fn compute_factuality_score(&self, response: &str) -> f32 {
941        // Simplified scoring - real implementation would verify against knowledge bases
942        if response.contains("fact") {
943            0.9
944        } else {
945            0.7
946        }
947    }
948
949    fn count_verified_claims(&self, response: &str) -> usize {
950        // Simplified counting - would extract and verify actual claims
951        response.split('.').filter(|s| s.len() > 10).count()
952    }
953
954    fn count_unverified_claims(&self, response: &str) -> usize {
955        // Simplified counting - would identify unverifiable claims
956        self.uncertainty_indicators
957            .iter()
958            .map(|indicator| response.matches(indicator).count())
959            .sum()
960    }
961
962    pub fn get_health_summary(&self) -> HealthSummary {
963        HealthSummary {
964            score: self.factuality_metrics.overall_factuality_score,
965            status: HealthStatus::Good,
966            trend: "Stable".to_string(),
967            key_metrics: HashMap::new(),
968            issues: vec![],
969        }
970    }
971}
972
973impl AlignmentMonitor {
974    pub fn new(_config: &LLMDebugConfig) -> Self {
975        Self {
976            alignment_objectives: vec![
977                AlignmentObjective::Helpfulness,
978                AlignmentObjective::Harmlessness,
979                AlignmentObjective::Honesty,
980                AlignmentObjective::Fairness,
981            ],
982            alignment_metrics: AlignmentMetrics {
983                objective_scores: HashMap::new(),
984                overall_alignment_score: 0.85,
985                alignment_violations: 0,
986                value_consistency_score: 0.9,
987                behavioral_drift: 0.1,
988                alignment_trend: AlignmentTrend::Stable,
989            },
990            value_alignment_score: 0.85,
991        }
992    }
993
994    pub async fn check_alignment(
995        &mut self,
996        input: &str,
997        response: &str,
998    ) -> Result<AlignmentAnalysisResult> {
999        let alignment_score = self.compute_alignment_score(input, response);
1000        let objective_scores = self.assess_objectives(input, response);
1001
1002        Ok(AlignmentAnalysisResult {
1003            alignment_score,
1004            objective_scores,
1005            violations: vec![], // Would be populated with actual violations
1006            consistency_score: 0.9,
1007        })
1008    }
1009
1010    fn compute_alignment_score(&self, _input: &str, _response: &str) -> f32 {
1011        // Simplified alignment scoring
1012        0.85
1013    }
1014
1015    fn assess_objectives(&self, _input: &str, _response: &str) -> HashMap<AlignmentObjective, f32> {
1016        let mut scores = HashMap::new();
1017        scores.insert(AlignmentObjective::Helpfulness, 0.9);
1018        scores.insert(AlignmentObjective::Harmlessness, 0.95);
1019        scores.insert(AlignmentObjective::Honesty, 0.8);
1020        scores.insert(AlignmentObjective::Fairness, 0.85);
1021        scores
1022    }
1023
1024    pub fn get_health_summary(&self) -> HealthSummary {
1025        HealthSummary {
1026            score: self.alignment_metrics.overall_alignment_score,
1027            status: HealthStatus::Good,
1028            trend: "Stable".to_string(),
1029            key_metrics: HashMap::new(),
1030            issues: vec![],
1031        }
1032    }
1033}
1034
1035impl HallucinationDetector {
1036    pub fn new(_config: &LLMDebugConfig) -> Self {
1037        Self {
1038            confidence_thresholds: HashMap::new(),
1039            consistency_checker: ConsistencyChecker {
1040                previous_responses: Vec::new(),
1041                consistency_cache: HashMap::new(),
1042            },
1043            hallucination_metrics: HallucinationMetrics {
1044                hallucination_rate: 0.1,
1045                confidence_accuracy_correlation: 0.7,
1046                factual_consistency_score: 0.8,
1047                internal_consistency_score: 0.85,
1048                source_attribution_accuracy: 0.9,
1049                detected_fabrications: 0,
1050                uncertain_responses: 0,
1051            },
1052        }
1053    }
1054
1055    pub async fn detect_hallucinations(
1056        &mut self,
1057        response: &str,
1058        _context: Option<&[String]>,
1059    ) -> Result<HallucinationAnalysisResult> {
1060        let hallucination_probability = self.compute_hallucination_probability(response);
1061        let confidence_accuracy = self.assess_confidence_accuracy(response);
1062        let internal_consistency = self.consistency_checker.check_consistency(response);
1063
1064        Ok(HallucinationAnalysisResult {
1065            hallucination_probability,
1066            confidence_accuracy,
1067            internal_consistency,
1068            detected_fabrications: vec![], // Would be populated with actual fabrications
1069        })
1070    }
1071
1072    fn compute_hallucination_probability(&self, response: &str) -> f32 {
1073        // Simplified probability computation
1074        if response.contains("I'm not sure") {
1075            0.2
1076        } else {
1077            0.1
1078        }
1079    }
1080
1081    fn assess_confidence_accuracy(&self, _response: &str) -> f32 {
1082        // Simplified confidence assessment
1083        0.7
1084    }
1085}
1086
1087impl ConsistencyChecker {
1088    pub fn check_consistency(&mut self, response: &str) -> f32 {
1089        self.previous_responses.push(response.to_string());
1090        // Simplified consistency checking
1091        0.85
1092    }
1093}
1094
1095impl BiasDetector {
1096    pub fn new(_config: &LLMDebugConfig) -> Self {
1097        Self {
1098            bias_categories: vec![
1099                BiasCategory::Gender,
1100                BiasCategory::Race,
1101                BiasCategory::Religion,
1102                BiasCategory::Age,
1103            ],
1104            demographic_groups: vec![
1105                "male".to_string(),
1106                "female".to_string(),
1107                "young".to_string(),
1108                "elderly".to_string(),
1109            ],
1110            bias_metrics: BiasMetrics {
1111                overall_bias_score: 0.1, // Lower is better for bias
1112                bias_category_scores: HashMap::new(),
1113                demographic_fairness: HashMap::new(),
1114                representation_bias: 0.1,
1115                stereotype_propagation: 0.05,
1116                bias_amplification: 0.08,
1117                fairness_violations: 0,
1118            },
1119        }
1120    }
1121
1122    pub async fn detect_bias(&mut self, response: &str) -> Result<BiasAnalysisResult> {
1123        let overall_bias_score = self.compute_overall_bias_score(response);
1124        let bias_categories = self.analyze_bias_categories(response);
1125
1126        Ok(BiasAnalysisResult {
1127            overall_bias_score,
1128            bias_categories,
1129            detected_biases: vec![], // Would be populated with actual biases
1130            fairness_violations: vec![], // Would be populated with violations
1131        })
1132    }
1133
1134    fn compute_overall_bias_score(&self, _response: &str) -> f32 {
1135        // Simplified bias scoring
1136        0.1
1137    }
1138
1139    fn analyze_bias_categories(&self, _response: &str) -> HashMap<BiasCategory, f32> {
1140        let mut scores = HashMap::new();
1141        scores.insert(BiasCategory::Gender, 0.1);
1142        scores.insert(BiasCategory::Race, 0.05);
1143        scores.insert(BiasCategory::Religion, 0.08);
1144        scores
1145    }
1146
1147    pub fn get_health_summary(&self) -> HealthSummary {
1148        HealthSummary {
1149            score: 1.0 - self.bias_metrics.overall_bias_score, // Invert since lower bias is better
1150            status: HealthStatus::Good,
1151            trend: "Stable".to_string(),
1152            key_metrics: HashMap::new(),
1153            issues: vec![],
1154        }
1155    }
1156}
1157
1158impl LLMPerformanceProfiler {
1159    pub fn new() -> Self {
1160        Self {
1161            generation_metrics: GenerationMetrics {
1162                tokens_per_second: 100.0,
1163                average_response_length: 150.0,
1164                generation_latency_p50: 200.0,
1165                generation_latency_p95: 500.0,
1166                generation_latency_p99: 1000.0,
1167                first_token_latency: 50.0,
1168                completion_rate: 0.98,
1169                timeout_rate: 0.02,
1170            },
1171            efficiency_metrics: EfficiencyMetrics {
1172                memory_efficiency: 0.85,
1173                compute_utilization: 0.75,
1174                energy_consumption: 0.5,        // kWh per 1000 tokens
1175                carbon_footprint_estimate: 0.1, // kg CO2 per 1000 tokens
1176                cost_per_token: 0.001,          // USD per token
1177                batch_processing_efficiency: 0.9,
1178                cache_hit_rate: 0.7,
1179            },
1180            quality_metrics: QualityMetrics {
1181                coherence_score: 0.9,
1182                relevance_score: 0.85,
1183                fluency_score: 0.95,
1184                informativeness_score: 0.8,
1185                creativity_score: 0.7,
1186                factual_accuracy: 0.85,
1187                readability_score: 0.9,
1188                engagement_score: 0.8,
1189            },
1190            scalability_metrics: ScalabilityMetrics {
1191                concurrent_user_capacity: 1000,
1192                throughput_scaling: 0.8,
1193                memory_scaling: 0.7,
1194                latency_degradation: 0.1,
1195                bottleneck_analysis: vec!["Memory bandwidth".to_string()],
1196                resource_utilization_efficiency: 0.8,
1197            },
1198        }
1199    }
1200
1201    pub async fn profile_response(
1202        &mut self,
1203        _response: &str,
1204        generation_metrics: Option<GenerationMetrics>,
1205    ) -> Result<PerformanceAnalysisResult> {
1206        let gen_metrics = generation_metrics.unwrap_or_else(|| self.generation_metrics.clone());
1207
1208        Ok(PerformanceAnalysisResult {
1209            generation_metrics: gen_metrics,
1210            efficiency_metrics: self.efficiency_metrics.clone(),
1211            quality_metrics: self.quality_metrics.clone(),
1212            bottlenecks: vec![], // Would be populated with identified bottlenecks
1213        })
1214    }
1215
1216    pub fn get_health_summary(&self) -> HealthSummary {
1217        HealthSummary {
1218            score: (self.generation_metrics.tokens_per_second / 200.0).min(1.0),
1219            status: HealthStatus::Good,
1220            trend: "Stable".to_string(),
1221            key_metrics: HashMap::new(),
1222            issues: vec![],
1223        }
1224    }
1225}
1226
1227impl ConversationAnalyzer {
1228    pub fn new(_config: &LLMDebugConfig) -> Self {
1229        Self {
1230            conversation_history: Vec::new(),
1231            dialog_metrics: DialogMetrics {
1232                conversation_coherence: 0.9,
1233                context_maintenance: 0.85,
1234                topic_consistency: 0.8,
1235                response_appropriateness: 0.9,
1236                conversation_engagement: 0.75,
1237                turn_taking_naturalness: 0.8,
1238                memory_utilization: 0.7,
1239                dialog_success_rate: 0.85,
1240            },
1241            context_tracking: ContextTracker {
1242                active_topics: HashSet::new(),
1243                entity_mentions: HashMap::new(),
1244                context_window: Vec::new(),
1245                attention_weights: Vec::new(),
1246            },
1247        }
1248    }
1249
1250    pub async fn analyze_turn(
1251        &mut self,
1252        turn: &ConversationTurn,
1253    ) -> Result<ConversationAnalysisResult> {
1254        self.conversation_history.push(turn.clone());
1255        self.context_tracking.update_from_turn(turn);
1256
1257        Ok(ConversationAnalysisResult {
1258            dialog_metrics: self.dialog_metrics.clone(),
1259            context_consistency: self.compute_context_consistency(),
1260            turn_quality: self.assess_turn_quality(turn),
1261            engagement_score: self.compute_engagement_score(),
1262        })
1263    }
1264
1265    fn compute_context_consistency(&self) -> f32 {
1266        // Simplified context consistency computation
1267        0.85
1268    }
1269
1270    fn assess_turn_quality(&self, _turn: &ConversationTurn) -> f32 {
1271        // Simplified turn quality assessment
1272        0.9
1273    }
1274
1275    fn compute_engagement_score(&self) -> f32 {
1276        // Simplified engagement scoring
1277        0.8
1278    }
1279
1280    pub fn get_health_summary(&self) -> HealthSummary {
1281        HealthSummary {
1282            score: self.dialog_metrics.conversation_coherence,
1283            status: HealthStatus::Good,
1284            trend: "Stable".to_string(),
1285            key_metrics: HashMap::new(),
1286            issues: vec![],
1287        }
1288    }
1289}
1290
1291impl ContextTracker {
1292    pub fn update_from_turn(&mut self, turn: &ConversationTurn) {
1293        // Update context tracking based on the turn
1294        self.context_window.push(turn.model_response.clone());
1295        if self.context_window.len() > 10 {
1296            self.context_window.remove(0);
1297        }
1298    }
1299}
1300
1301/// Convenience macros for LLM debugging
1302#[macro_export]
1303macro_rules! debug_llm_response {
1304    ($debugger:expr, $input:expr, $response:expr) => {
1305        $debugger.analyze_response($input, $response, None, None).await
1306    };
1307}
1308
1309#[macro_export]
1310macro_rules! debug_llm_batch {
1311    ($debugger:expr, $interactions:expr) => {
1312        $debugger.analyze_batch($interactions).await
1313    };
1314}
1315
1316/// Create a new LLM debugger with default configuration
1317pub fn llm_debugger() -> LLMDebugger {
1318    LLMDebugger::new(LLMDebugConfig::default())
1319}
1320
1321/// Create a new LLM debugger with custom configuration
1322pub fn llm_debugger_with_config(config: LLMDebugConfig) -> LLMDebugger {
1323    LLMDebugger::new(config)
1324}
1325
1326/// Create a safety-focused LLM debugger configuration
1327pub fn safety_focused_config() -> LLMDebugConfig {
1328    LLMDebugConfig {
1329        enable_safety_analysis: true,
1330        enable_factuality_checking: true,
1331        enable_alignment_monitoring: true,
1332        enable_hallucination_detection: true,
1333        enable_bias_detection: true,
1334        enable_llm_performance_profiling: false,
1335        enable_conversation_analysis: false,
1336        safety_threshold: 0.9,
1337        factuality_threshold: 0.8,
1338        max_conversation_length: 50,
1339        analysis_sampling_rate: 1.0,
1340    }
1341}
1342
1343/// Create a performance-focused LLM debugger configuration
1344pub fn performance_focused_config() -> LLMDebugConfig {
1345    LLMDebugConfig {
1346        enable_safety_analysis: false,
1347        enable_factuality_checking: false,
1348        enable_alignment_monitoring: false,
1349        enable_hallucination_detection: false,
1350        enable_bias_detection: false,
1351        enable_llm_performance_profiling: true,
1352        enable_conversation_analysis: true,
1353        safety_threshold: 0.7,
1354        factuality_threshold: 0.6,
1355        max_conversation_length: 200,
1356        analysis_sampling_rate: 0.1,
1357    }
1358}
1359
1360/// Tests for LLM debugging functionality
1361#[cfg(test)]
1362mod tests {
1363    use super::*;
1364
1365    #[tokio::test]
1366    async fn test_llm_debugger_creation() {
1367        let debugger = llm_debugger();
1368        assert!(debugger.config.enable_safety_analysis);
1369    }
1370
1371    #[tokio::test]
1372    async fn test_safety_analysis() {
1373        let mut debugger = llm_debugger();
1374        let result = debugger
1375            .analyze_response(
1376                "How are you?",
1377                "I'm doing well, thank you for asking!",
1378                None,
1379                None,
1380            )
1381            .await;
1382
1383        assert!(result.is_ok());
1384        let report = result.unwrap();
1385        assert!(report.safety_analysis.is_some());
1386        assert!(report.overall_score > 0.0);
1387    }
1388
1389    #[tokio::test]
1390    async fn test_batch_analysis() {
1391        let mut debugger = llm_debugger();
1392        let interactions = vec![
1393            ("Hello".to_string(), "Hi there!".to_string()),
1394            ("How are you?".to_string(), "I'm good!".to_string()),
1395        ];
1396
1397        let result = debugger.analyze_batch(&interactions).await;
1398        assert!(result.is_ok());
1399
1400        let batch_report = result.unwrap();
1401        assert_eq!(batch_report.batch_size, 2);
1402        assert_eq!(batch_report.individual_reports.len(), 2);
1403    }
1404
1405    #[tokio::test]
1406    async fn test_health_report_generation() {
1407        let mut debugger = llm_debugger();
1408        let health_report = debugger.generate_health_report().await;
1409
1410        assert!(health_report.is_ok());
1411        let report = health_report.unwrap();
1412        assert!(report.overall_health_score > 0.0);
1413    }
1414
1415    #[tokio::test]
1416    async fn test_safety_focused_config() {
1417        let config = safety_focused_config();
1418        assert!(config.enable_safety_analysis);
1419        assert!(config.enable_bias_detection);
1420        assert!(!config.enable_llm_performance_profiling);
1421        assert_eq!(config.safety_threshold, 0.9);
1422    }
1423
1424    #[tokio::test]
1425    async fn test_performance_focused_config() {
1426        let config = performance_focused_config();
1427        assert!(!config.enable_safety_analysis);
1428        assert!(config.enable_llm_performance_profiling);
1429        assert!(config.enable_conversation_analysis);
1430        assert_eq!(config.analysis_sampling_rate, 0.1);
1431    }
1432}
trustformers_debug/llm_debugging.rs

trustformers_debug/
llm_debugging.rs