impl TokenAnalyzer {
fn new() -> Self {
Self
}
fn tokenize(&self, text: &str) -> Vec<String> {
text.split_whitespace().map(str::to_lowercase).collect()
}
fn to_vector(&self, tokens: &[String]) -> TokenVector {
let mut vector = HashMap::new();
let total = tokens.len() as f64;
for token in tokens {
*vector.entry(token.clone()).or_insert(0.0) += 1.0 / total;
}
vector
}
fn cosine_similarity(&self, v1: &TokenVector, v2: &TokenVector) -> f64 {
let mut dot_product = 0.0;
let mut norm1 = 0.0;
let mut norm2 = 0.0;
for (token, weight1) in v1 {
norm1 += weight1 * weight1;
if let Some(weight2) = v2.get(token) {
dot_product += weight1 * weight2;
}
}
for weight2 in v2.values() {
norm2 += weight2 * weight2;
}
if norm1 > 0.0 && norm2 > 0.0 {
dot_product / (norm1.sqrt() * norm2.sqrt())
} else {
0.0
}
}
}
impl EntropyCalculator {
fn new() -> Self {
Self
}
fn calculate(&self, text: &str) -> f64 {
let mut char_counts = HashMap::new();
let total = text.len() as f64;
for ch in text.chars() {
*char_counts.entry(ch).or_insert(0) += 1;
}
let mut entropy = 0.0;
for count in char_counts.values() {
let probability = f64::from(*count) / total;
if probability > 0.0 {
entropy -= probability * probability.log2();
}
}
entropy
}
}