use crate::error::Result;
use crate::tensor::Tensor;
#[derive(Debug, Clone)]
pub struct CognitiveProfile {
pub attention_entropy: Vec<Vec<f64>>,
pub mean_attention_entropy: f64,
pub context_utilization: f64,
pub peak_attention: f64,
pub attention_sparsity: f64,
pub num_layers: usize,
pub num_heads: usize,
}
pub fn shannon_entropy(probs: &[f64]) -> f64 {
let mut h = 0.0;
for &p in probs {
if p > 1e-10 {
h -= p * p.log2();
}
}
h
}
pub fn analyze_attention(attention_weights: &[Vec<Tensor>]) -> Result<CognitiveProfile> {
let num_layers = attention_weights.len();
if num_layers == 0 {
return Ok(CognitiveProfile {
attention_entropy: Vec::new(),
mean_attention_entropy: 0.0,
context_utilization: 0.0,
peak_attention: 0.0,
attention_sparsity: 0.0,
num_layers: 0,
num_heads: 0,
});
}
let num_heads = attention_weights[0].len();
let mut all_entropies = Vec::with_capacity(num_layers);
let mut total_entropy = 0.0;
let mut entropy_count = 0;
let mut peak = 0.0_f64;
let mut sparse_count = 0_usize;
let mut total_weights = 0_usize;
let mut utilized_count = 0_usize;
let mut total_positions = 0_usize;
let attention_threshold = 0.01; let utilization_threshold = 0.05;
for layer_weights in attention_weights {
let mut layer_entropies = Vec::with_capacity(num_heads);
for head_weights in layer_weights {
let seq_len = head_weights.shape()[0];
let mut head_entropy_sum = 0.0;
for r in 0..seq_len {
let row = head_weights.row(r)?;
let data = row.data();
let h = shannon_entropy(data);
total_entropy += h;
head_entropy_sum += h;
entropy_count += 1;
for &w in data {
if w > peak {
peak = w;
}
if w < attention_threshold {
sparse_count += 1;
}
if w > utilization_threshold {
utilized_count += 1;
}
total_weights += 1;
total_positions += 1;
}
}
let avg_h = if seq_len > 0 {
head_entropy_sum / seq_len as f64
} else {
0.0
};
layer_entropies.push(avg_h);
}
all_entropies.push(layer_entropies);
}
let mean_entropy = if entropy_count > 0 {
total_entropy / entropy_count as f64
} else {
0.0
};
let sparsity = if total_weights > 0 {
sparse_count as f64 / total_weights as f64
} else {
0.0
};
let utilization = if total_positions > 0 {
utilized_count as f64 / total_positions as f64
} else {
0.0
};
Ok(CognitiveProfile {
attention_entropy: all_entropies,
mean_attention_entropy: mean_entropy,
context_utilization: utilization,
peak_attention: peak,
attention_sparsity: sparsity,
num_layers,
num_heads,
})
}
pub fn generation_confidence(logits: &Tensor) -> Result<f64> {
let probs = logits.softmax()?;
probs.max()
}
pub fn perplexity(token_ids: &[usize], logits_per_step: &[Tensor]) -> Result<f64> {
if token_ids.is_empty() || logits_per_step.is_empty() {
return Ok(f64::INFINITY);
}
let n = token_ids.len().min(logits_per_step.len());
let mut log_prob_sum = 0.0;
for i in 0..n {
let probs = logits_per_step[i].softmax()?;
let token_id = token_ids[i];
let p = probs.get_flat(token_id).unwrap_or(1e-10).max(1e-10); log_prob_sum += p.ln();
}
Ok((-log_prob_sum / n as f64).exp())
}
impl std::fmt::Display for CognitiveProfile {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
writeln!(f, "=== Cognitive Profile ===")?;
writeln!(
f,
"Layers: {} | Heads: {}",
self.num_layers, self.num_heads
)?;
writeln!(
f,
"Mean Attention Entropy: {:.4} bits",
self.mean_attention_entropy
)?;
writeln!(
f,
"Context Utilization: {:.1}%",
self.context_utilization * 100.0
)?;
writeln!(f, "Peak Attention Weight: {:.4}", self.peak_attention)?;
writeln!(
f,
"Attention Sparsity: {:.1}%",
self.attention_sparsity * 100.0
)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_entropy_uniform() {
let probs = vec![0.25, 0.25, 0.25, 0.25];
let h = shannon_entropy(&probs);
assert!((h - 2.0).abs() < 1e-10);
}
#[test]
fn test_entropy_peaked() {
let probs = vec![0.99, 0.003, 0.003, 0.004];
let h = shannon_entropy(&probs);
assert!(h < 0.1);
}
#[test]
fn test_confidence() {
let logits = Tensor::new(vec![0.0, 0.0, 10.0, 0.0], vec![4]).unwrap();
let conf = generation_confidence(&logits).unwrap();
assert!(conf > 0.95); }
}