use crate::statistical_tests::utils::convert_string;
use std::collections::HashMap;
pub fn get_even_dic(text: &str) -> f64 {
let data = convert_string(text);
if data.len() < 4 { return 0.0;
}
let mut digraph_counts = HashMap::new();
let mut total_digraphs = 0;
for i in (0..(data.len() - 1)).step_by(2) {
let digraph = (data[i], data[i + 1]);
*digraph_counts.entry(digraph).or_insert(0) += 1;
total_digraphs += 1;
}
let mut sum = 0.0;
for &count in digraph_counts.values() {
sum += count as f64 * (count as f64 - 1.0);
}
if total_digraphs <= 1 {
return 0.0;
}
let edi = sum / (total_digraphs as f64 * (total_digraphs as f64 - 1.0));
edi * 1000.0
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_edi_repeated_even_digraphs() {
let text = "ABABCDCD"; let edi = get_even_dic(text);
assert!(edi >= 0.0);
}
#[test]
fn test_edi_varied_digraphs() {
let text = "ABCDEFGH";
let edi = get_even_dic(text);
assert!(edi >= 0.0);
}
#[test]
fn test_edi_short_text() {
let text = "ABC";
let edi = get_even_dic(text);
assert_eq!(edi, 0.0);
}
}