use crate::statistical_tests::utils::convert_string;
use std::collections::HashMap;
pub fn get_dic(text: &str) -> f64 {
let data = convert_string(text);
if data.len() < 2 {
return 0.0;
}
let mut digraph_counts = HashMap::new();
let mut total_digraphs = 0;
for i in 0..(data.len() - 1) {
let digraph = (data[i], data[i + 1]);
*digraph_counts.entry(digraph).or_insert(0) += 1;
total_digraphs += 1;
}
let mut sum = 0.0;
for &count in digraph_counts.values() {
sum += count as f64 * (count as f64 - 1.0);
}
if total_digraphs <= 1 {
return 0.0;
}
let dic = sum / (total_digraphs as f64 * (total_digraphs as f64 - 1.0));
dic * 1000.0
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_dic_repeated_digraphs() {
let text = "ABABABABAB";
let dic = get_dic(text);
assert!(dic > 100.0); }
#[test]
fn test_dic_varied_digraphs() {
let text = "ABCDEFGHIJ";
let dic = get_dic(text);
assert!(dic < 100.0); }
#[test]
fn test_dic_short_text() {
let text = "A";
let dic = get_dic(text);
assert_eq!(dic, 0.0);
}
}