use crate::statistical_tests::utils::convert_string;
use std::collections::HashMap;
pub fn get_ldi(text: &str) -> f64 {
let data = convert_string(text);
if data.is_empty() {
return 0.0;
}
let english_freqs = [
0.082, 0.015, 0.028, 0.043, 0.127, 0.022, 0.020, 0.061, 0.070, 0.002,
0.008, 0.040, 0.024, 0.067, 0.075, 0.019, 0.001, 0.060, 0.063, 0.091,
0.028, 0.010, 0.023, 0.001, 0.020, 0.001
];
let mut letter_counts = HashMap::new();
let mut total_letters = 0;
for &c in &data {
if c < 26 { *letter_counts.entry(c).or_insert(0) += 1;
total_letters += 1;
}
}
if total_letters == 0 {
return 0.0;
}
let mut chi_squared = 0.0;
for i in 0..26 {
let observed = *letter_counts.get(&i).unwrap_or(&0) as f64;
let expected = english_freqs[i] * total_letters as f64;
if expected > 0.0 {
let diff = observed - expected;
chi_squared += (diff * diff) / expected;
}
}
chi_squared * 100.0
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ldi_english_text() {
let text = "THEQUICKBROWNFOXJUMPSOVERTHELAZYDOG";
let ldi = get_ldi(text);
assert!(ldi >= 0.0);
}
#[test]
fn test_ldi_non_english_distribution() {
let text = "AAAAAAAAAAAAAAAAAAAA";
let ldi = get_ldi(text);
assert!(ldi > 0.0);
}
#[test]
fn test_ldi_empty_text() {
let text = "";
let ldi = get_ldi(text);
assert_eq!(ldi, 0.0);
}
}