use itertools::Itertools;
use crate::frequency;
pub fn english() -> &'static std::collections::HashMap<char, f64> {
&ENGLISH_FREQUENCY
}
pub fn english_lowercase() -> &'static std::collections::HashMap<char, f64> {
&ENGLISH_LOWERCASE_FREQUENCY
}
pub fn english_uppercase() -> &'static std::collections::HashMap<char, f64> {
&ENGLISH_UPPERCASE_FREQUENCY
}
pub fn of(text: &str) -> std::collections::HashMap<char, f64> {
frequency::counts(text)
.into_iter()
.map(|(character, count)| (character, count as f64 / text.len() as f64))
.collect()
}
pub fn of_cased(text: &str) -> std::collections::HashMap<char, f64> {
frequency::cased_counts(text)
.into_iter()
.map(|(character, count)| (character, count as f64 / text.len() as f64))
.collect()
}
pub fn counts(text: &str) -> std::collections::HashMap<char, usize> {
text.to_lowercase().chars().counts()
}
pub fn cased_counts(text: &str) -> std::collections::HashMap<char, usize> {
text.chars().counts()
}
pub fn mapped_to_english(text: &str) -> String {
let mut available_frequencies = ENGLISH_LOWERCASE_FREQUENCY.clone();
let character_frequencies = frequency::of(text);
let mut character_map = std::collections::HashMap::new();
text.chars()
.map(|character| {
*character_map.entry(character).or_insert_with(|| {
let new_character = available_frequencies
.iter()
.map(|english| (*english.0, (english.1 - character_frequencies.get(&character).unwrap()).abs()))
.min_by(|first, other| first.1.total_cmp(&other.1))
.unwrap()
.0;
available_frequencies.remove(&new_character);
new_character
})
})
.collect()
}
pub fn closest_english_letter(frequency: f64) -> char {
ENGLISH_LOWERCASE_FREQUENCY
.iter()
.map(|(letter, english_frequency)| (*letter, (english_frequency - frequency).abs()))
.min_by(|first, other| first.1.total_cmp(&other.1))
.unwrap()
.0
}
pub fn distribution_score(text: &str) -> f64 {
let frequency_map = frequency::of(text);
let frequencies = frequency_map.iter().map(|item| item.1).sorted_by(|item, other| item.total_cmp(other)).rev();
let english_frequencies = ENGLISH_LOWERCASE_FREQUENCY.values().sorted_by(|item, other| item.total_cmp(other)).rev();
let mut differences = Vec::new();
for (frequency, english_frequency) in frequencies.zip(english_frequencies) {
differences.push(1. - (frequency - english_frequency).abs() / 0.99926);
}
differences.iter().fold(0., |accumulator, current| accumulator + current) / differences.len() as f64
}
pub fn character_score(text: &str) -> f64 {
let scores = frequency::of(text)
.into_iter()
.filter_map(|(character, frequency)| {
ENGLISH_FREQUENCY
.get(&character)
.map(|english_frequency| 1. - (frequency - english_frequency).abs() / 0.99926)
})
.collect::<Vec<_>>();
scores.iter().fold(0., |accumulator, current| accumulator + current) / scores.len() as f64
}
lazy_static::lazy_static! {
static ref ENGLISH_LOWERCASE_FREQUENCY: std::collections::HashMap<char, f64> = std::collections::HashMap::from([
('a', 0.082),
('b', 0.015),
('c', 0.028),
('d', 0.043),
('e', 0.127),
('f', 0.022),
('g', 0.020),
('h', 0.061),
('i', 0.070),
('j', 0.0015),
('k', 0.0077),
('l', 0.040),
('m', 0.024),
('n', 0.067),
('o', 0.075),
('p', 0.019),
('q', 0.00095),
('r', 0.060),
('s', 0.063),
('t', 0.091),
('u', 0.028),
('v', 0.0098),
('w', 0.024),
('x', 0.0015),
('y', 0.020),
('z', 0.00074),
]);
static ref ENGLISH_UPPERCASE_FREQUENCY: std::collections::HashMap<char, f64> = std::collections::HashMap::from([
('A', 0.082),
('B', 0.015),
('C', 0.028),
('D', 0.043),
('E', 0.127),
('F', 0.022),
('G', 0.020),
('H', 0.061),
('I', 0.070),
('J', 0.0015),
('K', 0.0077),
('L', 0.040),
('M', 0.024),
('N', 0.067),
('O', 0.075),
('P', 0.019),
('Q', 0.00095),
('R', 0.060),
('S', 0.063),
('T', 0.091),
('U', 0.028),
('V', 0.0098),
('W', 0.024),
('X', 0.0015),
('Y', 0.020),
('Z', 0.00074)
]);
static ref ENGLISH_FREQUENCY: std::collections::HashMap<char, f64> = std::collections::HashMap::from([
('a', 0.082),
('b', 0.015),
('c', 0.028),
('d', 0.043),
('e', 0.127),
('f', 0.022),
('g', 0.020),
('h', 0.061),
('i', 0.070),
('j', 0.0015),
('k', 0.0077),
('l', 0.040),
('m', 0.024),
('n', 0.067),
('o', 0.075),
('p', 0.019),
('q', 0.00095),
('r', 0.060),
('s', 0.063),
('t', 0.091),
('u', 0.028),
('v', 0.0098),
('w', 0.024),
('x', 0.0015),
('y', 0.020),
('z', 0.00074),
('A', 0.082),
('B', 0.015),
('C', 0.028),
('D', 0.043),
('E', 0.127),
('F', 0.022),
('G', 0.020),
('H', 0.061),
('I', 0.070),
('J', 0.0015),
('K', 0.0077),
('L', 0.040),
('M', 0.024),
('N', 0.067),
('O', 0.075),
('P', 0.019),
('Q', 0.00095),
('R', 0.060),
('S', 0.063),
('T', 0.091),
('U', 0.028),
('V', 0.0098),
('W', 0.024),
('X', 0.0015),
('Y', 0.020),
('Z', 0.00074)
]);
}