#[cfg(test)]
mod tests {
use crate::*;
use ahash::AHashSet;
const ITERATIONS: usize = 1000;
#[test]
#[cfg(feature = "de")]
fn test_german_randomness() {
test_language_randomness(Lang::De);
}
#[test]
#[cfg(feature = "en")]
fn test_english_randomness() {
test_language_randomness(Lang::En);
}
#[test]
#[cfg(feature = "es")]
fn test_spanish_randomness() {
test_language_randomness(Lang::Es);
}
#[test]
#[cfg(feature = "fr")]
fn test_french_randomness() {
test_language_randomness(Lang::Fr);
}
#[test]
#[cfg(feature = "ja")]
fn test_japanese_randomness() {
test_language_randomness(Lang::Ja);
}
#[test]
#[cfg(feature = "ru")]
fn test_russian_randomness() {
test_language_randomness(Lang::Ru);
}
#[test]
#[cfg(feature = "zh")]
fn test_chinese_randomness() {
test_language_randomness(Lang::Zh);
}
fn test_language_randomness(lang: Lang) {
let words = all(lang);
let total_words = words.len();
assert!(total_words >= 100000, "Less than 100,000 words available for {:?}", lang);
let mut seen: AHashSet<&'static str> = AHashSet::new();
let mut consecutive_duplicates = 0;
let mut max_consecutive_duplicates = 0;
let mut last_word = "";
for i in 0..ITERATIONS / 3 {
let word = get(lang);
assert!(!word.is_empty(), "Empty word found for {:?} at iteration {}", lang, i);
assert!(words.contains(&word), "Word '{}' not found in dictionary for {:?}", word, lang);
seen.insert(word);
if word == last_word {
consecutive_duplicates += 1;
max_consecutive_duplicates = max_consecutive_duplicates.max(consecutive_duplicates);
} else {
consecutive_duplicates = 0;
}
last_word = word;
}
let unique_words_seen = seen.len();
let duplicate_count = ITERATIONS - unique_words_seen;
let duplicate_ratio = duplicate_count as f64 / ITERATIONS as f64;
eprintln!("{:?} - Total words: {}, Iterations: {}, Unique seen: {}, Duplicates: {:.2}%, Max consecutive: {}",
lang, total_words, ITERATIONS, unique_words_seen, duplicate_ratio * 100.0, max_consecutive_duplicates);
assert!(unique_words_seen > ITERATIONS / 10,
"Poor randomness: only {} unique words out of {} iterations for {:?}",
unique_words_seen, ITERATIONS, lang);
assert!(max_consecutive_duplicates < 3,
"Too many consecutive duplicates: {} for {:?}",
max_consecutive_duplicates, lang);
if total_words > 1000 {
test_word_distribution(lang, 5000);
}
test_hamming_distance_distribution(lang, 1000);
}
fn test_word_distribution(lang: Lang, sample_size: usize) {
let words = words::get(lang);
let mut word_frequency = ahash::AHashMap::new();
for _ in 0..sample_size {
let word = get(lang);
*word_frequency.entry(word).or_insert(0) += 1;
}
let expected_frequency = sample_size as f64 / words.len() as f64;
let mut underrepresented = 0;
let mut chi_squared = 0.0;
for word in words.iter() {
let count = word_frequency.get(word).copied().unwrap_or(0);
let deviation = (count as f64 - expected_frequency).abs();
chi_squared += (deviation * deviation) / expected_frequency;
}
let chi_squared_per_word = chi_squared / words.len() as f64;
eprintln!("{:?} - Distribution chi-squared per word: {:.3}", lang, chi_squared_per_word);
assert!(chi_squared_per_word < 2.0,
"Poor distribution for {:?}: chi-squared per word = {:.3}",
lang, chi_squared_per_word);
assert!(underrepresented < words.len() / 10,
"Too many underrepresented words in {:?}: {} out of {}",
lang, underrepresented, words.len());
}
fn test_hamming_distance_distribution(lang: Lang, iterations: usize) {
let mut last_word = get(lang);
let mut hamming_distances = Vec::new();
let mut same_word_count = 0;
for _ in 0..iterations {
let current_word = get(lang);
if current_word == last_word {
same_word_count += 1;
} else {
let distance = hamming_distance(last_word, current_word);
hamming_distances.push(distance);
}
last_word = current_word;
}
if !hamming_distances.is_empty() {
let avg_distance = hamming_distances.iter().sum::<usize>() as f64 / hamming_distances.len() as f64;
let min_distance = *hamming_distances.iter().min().unwrap_or(&0);
let max_distance = *hamming_distances.iter().max().unwrap_or(&0);
eprintln!("{:?} - Hamming distance: avg={:.2}, min={}, max={}, same={}",
lang, avg_distance, min_distance, max_distance, same_word_count);
if avg_distance > 2.0 {
assert!(min_distance > 0, "No character differences found in {:?}", lang);
assert!(max_distance >= 3, "Insufficient character variation in {:?}", lang);
}
}
}
fn hamming_distance(a: &str, b: &str) -> usize {
let char_diff = a.chars()
.zip(b.chars())
.filter(|(c1, c2)| c1 != c2)
.count();
char_diff + a.len().abs_diff(b.len())
}
}