use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionAnalysisResult {
pub timestamp: DateTime<Utc>,
pub attention_weights: HashMap<String, AttentionLayerResult>,
pub attention_patterns: AttentionPatterns,
pub head_specialization: HeadSpecializationAnalysis,
pub attention_flow: AttentionFlowAnalysis,
pub attention_stats: AttentionStatistics,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionLayerResult {
pub layer_index: usize,
pub heads: HashMap<usize, AttentionHeadResult>,
pub layer_patterns: LayerAttentionPatterns,
pub layer_stats: LayerAttentionStats,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionHeadResult {
pub head_index: usize,
pub attention_matrix: Vec<Vec<f64>>,
pub token_scores: Vec<TokenAttentionScore>,
pub specialization_type: HeadSpecializationType,
pub entropy: f64,
pub max_attention: f64,
pub sparsity: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TokenAttentionScore {
pub token: String,
pub position: usize,
pub attention_received: f64,
pub attention_given: f64,
pub self_attention: f64,
pub most_attended: Vec<(String, f64)>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Hash, Eq, PartialEq)]
pub enum HeadSpecializationType {
Local,
Global,
Syntactic,
Semantic,
Positional,
Mixed,
SpecialToken,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionPatterns {
pub diagonal_patterns: Vec<DiagonalPattern>,
pub vertical_patterns: Vec<VerticalPattern>,
pub block_patterns: Vec<BlockPattern>,
pub repetitive_patterns: Vec<RepetitivePattern>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiagonalPattern {
pub layer_head: (usize, usize),
pub offset: i32,
pub strength: f64,
pub coverage: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerticalPattern {
pub layer_head: (usize, usize),
pub target_position: usize,
pub strength: f64,
pub attending_tokens: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BlockPattern {
pub layer_head: (usize, usize),
pub start_position: usize,
pub end_position: usize,
pub internal_strength: f64,
pub external_attention: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RepetitivePattern {
pub layer_head: (usize, usize),
pub period: usize,
pub strength: f64,
pub repetitions: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerAttentionPatterns {
pub avg_attention_distance: f64,
pub concentration: f64,
pub inter_head_similarity: f64,
pub diversity: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerAttentionStats {
pub mean_attention: f64,
pub attention_variance: f64,
pub entropy: f64,
pub significant_connections: usize,
pub sparsity_ratio: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeadSpecializationAnalysis {
pub layer_specialization: HashMap<usize, Vec<HeadSpecializationType>>,
pub head_clusters: Vec<HeadCluster>,
pub specialization_evolution: SpecializationEvolution,
pub redundancy_analysis: HeadRedundancyAnalysis,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeadCluster {
pub cluster_id: usize,
pub heads: Vec<(usize, usize)>,
pub centroid_pattern: Vec<f64>,
pub cohesion: f64,
pub specialization: HeadSpecializationType,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpecializationEvolution {
pub layer_distribution: HashMap<usize, HashMap<HeadSpecializationType, usize>>,
pub transitions: Vec<SpecializationTransition>,
pub trend: SpecializationTrend,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SpecializationTransition {
pub from_layer: usize,
pub to_layer: usize,
pub from_specialization: HeadSpecializationType,
pub to_specialization: HeadSpecializationType,
pub strength: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum SpecializationTrend {
Increasing,
Decreasing,
Stable,
Mixed,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct HeadRedundancyAnalysis {
pub redundant_pairs: Vec<RedundantHeadPair>,
pub redundancy_score: f64,
pub pruning_recommendations: Vec<PruningRecommendation>,
pub essential_heads: Vec<(usize, usize)>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RedundantHeadPair {
pub head1: (usize, usize),
pub head2: (usize, usize),
pub similarity: f64,
pub redundancy_type: RedundancyType,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RedundancyType {
Identical,
Correlated,
Functional,
Partial,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PruningRecommendation {
pub head_to_prune: (usize, usize),
pub confidence: f64,
pub expected_impact: PruningImpact,
pub reason: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PruningImpact {
pub performance_drop: f64,
pub memory_savings: f64,
pub computational_savings: f64,
pub risk_level: RiskLevel,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RiskLevel {
Low,
Medium,
High,
Critical,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionFlowAnalysis {
pub flow_paths: Vec<AttentionFlowPath>,
pub bottlenecks: Vec<AttentionBottleneck>,
pub efficiency_metrics: FlowEfficiencyMetrics,
pub layer_flow_stats: HashMap<usize, LayerFlowStats>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionFlowPath {
pub path_id: String,
pub start_position: usize,
pub end_position: usize,
pub flow_steps: Vec<LayerFlowStep>,
pub total_strength: f64,
pub path_length: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerFlowStep {
pub layer_index: usize,
pub head_index: usize,
pub strength: f64,
pub transformation: FlowTransformation,
pub involved_tokens: Vec<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum FlowTransformation {
Direct,
Diffusion,
Concentration,
Routing,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionBottleneck {
pub location: (usize, usize),
pub bottleneck_type: BottleneckType,
pub severity: f64,
pub affected_paths: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum BottleneckType {
Information,
Attention,
Capacity,
Flow,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FlowEfficiencyMetrics {
pub overall_efficiency: f64,
pub information_preservation: f64,
pub flow_redundancy: f64,
pub bottleneck_impact: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LayerFlowStats {
pub incoming_flow: f64,
pub outgoing_flow: f64,
pub retention_ratio: f64,
pub transformation_ratio: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionStatistics {
pub avg_entropy: f64,
pub concentration_distribution: HashMap<String, f64>,
pub sparsity_distribution: SparsityDistribution,
pub insights: Vec<AttentionInsight>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SparsityDistribution {
pub by_layer: HashMap<usize, f64>,
pub by_head: HashMap<(usize, usize), f64>,
pub overall_sparsity: f64,
pub sparsity_variance: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AttentionInsight {
pub insight_type: InsightType,
pub description: String,
pub confidence: f64,
pub evidence: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum InsightType {
HeadSpecialization,
PatternDiscovery,
FlowAnalysis,
RedundancyDetection,
OptimizationOpportunity,
BehaviorExplanation,
}