#[derive(Debug, Clone)]
pub struct CompressionQuality {
pub input_entropy: f64,
pub theoretical_min_tokens: u32,
pub actual_tokens: u32,
pub efficiency: f64,
pub headroom_tokens: u32,
pub grade: QualityGrade,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum QualityGrade {
Excellent,
Good,
Fair,
Poor,
}
impl QualityGrade {
pub fn as_str(&self) -> &'static str {
match self {
Self::Excellent => "excellent",
Self::Good => "good",
Self::Fair => "fair",
Self::Poor => "poor",
}
}
}
pub fn measure_quality(
input: &str,
tokens_original: u32,
tokens_compressed: u32,
) -> CompressionQuality {
let entropy = shannon_entropy_bits_per_byte(input);
let input_bytes = input.len() as f64;
let bits_per_token = 32.0; let theoretical_min_bits = entropy * input_bytes;
let theoretical_min_tokens = (theoretical_min_bits / bits_per_token).ceil() as u32;
let theoretical_min_tokens = theoretical_min_tokens.min(tokens_original);
let efficiency = if tokens_compressed == 0 {
1.0
} else if theoretical_min_tokens == 0 {
1.0
} else {
(theoretical_min_tokens as f64 / tokens_compressed as f64).min(1.0)
};
let headroom = tokens_compressed.saturating_sub(theoretical_min_tokens);
let grade = if efficiency >= 0.9 {
QualityGrade::Excellent
} else if efficiency >= 0.7 {
QualityGrade::Good
} else if efficiency >= 0.5 {
QualityGrade::Fair
} else {
QualityGrade::Poor
};
CompressionQuality {
input_entropy: entropy,
theoretical_min_tokens,
actual_tokens: tokens_compressed,
efficiency,
headroom_tokens: headroom,
grade,
}
}
fn shannon_entropy_bits_per_byte(text: &str) -> f64 {
if text.is_empty() {
return 0.0;
}
let mut freq = [0u32; 256];
let len = text.len() as f64;
for &byte in text.as_bytes() {
freq[byte as usize] += 1;
}
let mut entropy = 0.0f64;
for &count in &freq {
if count > 0 {
let p = count as f64 / len;
entropy -= p * p.log2();
}
}
entropy
}
pub fn format_quality_report(q: &CompressionQuality) -> String {
format!(
"entropy: {:.2} bits/byte | min: {} tokens | actual: {} tokens | \
efficiency: {:.0}% | headroom: {} tokens | grade: {}",
q.input_entropy,
q.theoretical_min_tokens,
q.actual_tokens,
q.efficiency * 100.0,
q.headroom_tokens,
q.grade.as_str(),
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_input() {
let q = measure_quality("", 0, 0);
assert_eq!(q.input_entropy, 0.0);
assert_eq!(q.efficiency, 1.0);
assert_eq!(q.grade, QualityGrade::Excellent);
}
#[test]
fn test_no_compression() {
let input = "hello world this is a test string with some content";
let tokens = 12;
let q = measure_quality(input, tokens, tokens);
assert!(q.efficiency <= 1.0);
assert!(q.headroom_tokens <= tokens);
}
#[test]
fn test_perfect_compression() {
let input = "aaaa"; let q = measure_quality(input, 1, 1);
assert_eq!(q.grade, QualityGrade::Excellent);
}
#[test]
fn test_high_entropy_input() {
let input: String = (0..200).map(|i| (b'a' + (i % 26)) as char).collect();
let q = measure_quality(&input, 50, 50);
assert!(q.input_entropy > 3.0, "high entropy input should have entropy > 3");
}
#[test]
fn test_efficiency_bounded() {
let q = measure_quality("test content here", 5, 3);
assert!(q.efficiency >= 0.0 && q.efficiency <= 1.0);
}
#[test]
fn test_grade_thresholds() {
assert_eq!(
measure_quality("a", 100, 1).grade,
QualityGrade::Excellent
);
}
#[test]
fn test_format_quality_report() {
let q = measure_quality("hello world test content", 6, 4);
let report = format_quality_report(&q);
assert!(report.contains("entropy:"));
assert!(report.contains("efficiency:"));
assert!(report.contains("grade:"));
}
#[test]
fn test_shannon_entropy_single_char() {
assert_eq!(shannon_entropy_bits_per_byte("aaaa"), 0.0);
}
#[test]
fn test_shannon_entropy_varied() {
let low = shannon_entropy_bits_per_byte("aaaa");
let high = shannon_entropy_bits_per_byte("abcdefghijklmnop");
assert!(high > low);
}
use proptest::prelude::*;
proptest! {
#[test]
fn prop_efficiency_bounded(
text in "[a-z ]{10,200}",
original in 5u32..=100u32,
compressed in 1u32..=100u32,
) {
let q = measure_quality(&text, original, compressed.min(original));
prop_assert!(
q.efficiency >= 0.0 && q.efficiency <= 1.0,
"efficiency out of bounds: {}",
q.efficiency
);
}
#[test]
fn prop_entropy_non_negative(text in ".{1,100}") {
let e = shannon_entropy_bits_per_byte(&text);
prop_assert!(e >= 0.0, "entropy should be non-negative: {e}");
}
}
}