use crate::{EntityType, Model};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Ethnicity {
European,
AfricanAmerican,
Hispanic,
EastAsian,
SouthAsian,
MiddleEastern,
African,
Indigenous,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Region {
NorthAmerica,
WesternEurope,
EasternEurope,
EastAsia,
SouthAsia,
SoutheastAsia,
MiddleEast,
Africa,
LatinAmerica,
Oceania,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum Script {
Latin,
Cyrillic,
Arabic,
Chinese,
Japanese,
Korean,
Devanagari,
Thai,
Greek,
Hebrew,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NameExample {
pub name: String,
pub first_name: String,
pub last_name: String,
pub ethnicity: Ethnicity,
pub script: Script,
pub gender: Option<Gender>,
pub frequency: NameFrequency,
}
pub use anno_core::Gender;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum NameFrequency {
Common,
Moderate,
Rare,
}
impl NameExample {
pub fn new(
first_name: &str,
last_name: &str,
ethnicity: Ethnicity,
script: Script,
gender: Option<Gender>,
frequency: NameFrequency,
) -> Self {
Self {
name: format!("{} {}", first_name, last_name),
first_name: first_name.to_string(),
last_name: last_name.to_string(),
ethnicity,
script,
gender,
frequency,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LocationExample {
pub name: String,
pub region: Region,
pub script: Script,
pub location_type: LocationType,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub enum LocationType {
City,
Country,
SubnationalRegion,
Landmark,
}
impl LocationExample {
pub fn new(name: &str, region: Region, script: Script, location_type: LocationType) -> Self {
Self {
name: name.to_string(),
region,
script,
location_type,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DemographicBiasResults {
pub overall_recognition_rate: f64,
pub by_ethnicity: HashMap<String, f64>,
pub by_script: HashMap<String, f64>,
pub by_gender: HashMap<String, f64>,
pub by_frequency: HashMap<String, f64>,
pub ethnicity_parity_gap: f64,
pub script_bias_gap: f64,
pub intersectional: HashMap<String, f64>,
pub extended_intersectional: HashMap<String, f64>,
pub total_tested: usize,
pub detailed: Vec<NameResult>,
pub statistical: Option<crate::eval::bias_config::StatisticalBiasResults>,
pub frequency_weighted: Option<crate::eval::bias_config::FrequencyWeightedResults>,
pub distribution_validation: Option<crate::eval::bias_config::DistributionValidation>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct NameResult {
pub name: String,
pub recognized: bool,
pub confidence: Option<f64>,
pub ethnicity: String,
pub script: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RegionalBiasResults {
pub overall_recognition_rate: f64,
pub by_region: HashMap<String, f64>,
pub by_script: HashMap<String, f64>,
pub regional_parity_gap: f64,
pub total_tested: usize,
}
#[derive(Debug, Clone, Default)]
pub struct DemographicBiasEvaluator {
pub detailed: bool,
pub config: crate::eval::bias_config::BiasDatasetConfig,
}
impl DemographicBiasEvaluator {
pub fn new(detailed: bool) -> Self {
Self {
detailed,
config: crate::eval::bias_config::BiasDatasetConfig::default(),
}
}
pub fn with_config(
detailed: bool,
config: crate::eval::bias_config::BiasDatasetConfig,
) -> Self {
Self { detailed, config }
}
pub fn evaluate_ner(&self, model: &dyn Model, names: &[NameExample]) -> DemographicBiasResults {
let mut by_ethnicity: HashMap<String, (usize, usize)> = HashMap::new();
let mut by_script: HashMap<String, (usize, usize)> = HashMap::new();
let mut by_gender: HashMap<String, (usize, usize)> = HashMap::new();
let mut by_frequency: HashMap<String, (usize, usize)> = HashMap::new();
let mut intersectional: HashMap<String, (usize, usize)> = HashMap::new();
let mut extended_intersectional: HashMap<String, (usize, usize)> = HashMap::new();
let mut detailed_results = Vec::new();
let mut total_recognized = 0;
let mut recognized_flags = Vec::new();
let mut name_strings = Vec::new();
for name_example in names {
let text = create_realistic_sentence(&name_example.name);
let entities = model.extract_entities(&text, None).unwrap_or_default();
let recognized = entities.iter().any(|e| {
e.entity_type == EntityType::Person
&& e.extract_text(&text).contains(&name_example.first_name)
});
let confidence = if recognized {
entities
.iter()
.find(|e| e.entity_type == EntityType::Person)
.map(|e| e.confidence)
} else {
None
};
if recognized {
total_recognized += 1;
}
recognized_flags.push(recognized);
name_strings.push(name_example.name.clone());
let eth_key = format!("{:?}", name_example.ethnicity);
let eth_entry = by_ethnicity.entry(eth_key.clone()).or_insert((0, 0));
eth_entry.1 += 1;
if recognized {
eth_entry.0 += 1;
}
let script_key = format!("{:?}", name_example.script);
let script_entry = by_script.entry(script_key.clone()).or_insert((0, 0));
script_entry.1 += 1;
if recognized {
script_entry.0 += 1;
}
if let Some(gender) = name_example.gender {
let gender_key = format!("{:?}", gender);
let gender_entry = by_gender.entry(gender_key).or_insert((0, 0));
gender_entry.1 += 1;
if recognized {
gender_entry.0 += 1;
}
}
let freq_key = format!("{:?}", name_example.frequency);
let freq_entry = by_frequency.entry(freq_key).or_insert((0, 0));
freq_entry.1 += 1;
if recognized {
freq_entry.0 += 1;
}
if let Some(gender) = name_example.gender {
let inter_key = format!("{:?}_{:?}", name_example.ethnicity, gender);
let inter_entry = intersectional.entry(inter_key).or_insert((0, 0));
inter_entry.1 += 1;
if recognized {
inter_entry.0 += 1;
}
let ext_inter_key = format!(
"{:?}_{:?}_{:?}",
name_example.ethnicity, gender, name_example.frequency
);
let ext_inter_entry = extended_intersectional
.entry(ext_inter_key)
.or_insert((0, 0));
ext_inter_entry.1 += 1;
if recognized {
ext_inter_entry.0 += 1;
}
}
if self.detailed {
detailed_results.push(NameResult {
name: name_example.name.clone(),
recognized,
confidence: confidence.map(|c| c.value()),
ethnicity: eth_key,
script: script_key,
});
}
}
let to_rate = |counts: &HashMap<String, (usize, usize)>| -> HashMap<String, f64> {
counts
.iter()
.map(|(k, (correct, total))| {
let rate = if *total > 0 {
*correct as f64 / *total as f64
} else {
0.0
};
(k.clone(), rate)
})
.collect()
};
let ethnicity_rates = to_rate(&by_ethnicity);
let script_rates = to_rate(&by_script);
let gender_rates = to_rate(&by_gender);
let frequency_rates = to_rate(&by_frequency);
let intersectional_rates = to_rate(&intersectional);
let extended_intersectional_rates = to_rate(&extended_intersectional);
let ethnicity_parity_gap = compute_max_gap(ðnicity_rates);
let latin_rate = script_rates.get("Latin").copied().unwrap_or(0.0);
let non_latin_rates: Vec<f64> = script_rates
.iter()
.filter(|(k, _)| k.as_str() != "Latin")
.map(|(_, v)| *v)
.collect();
let avg_non_latin = if non_latin_rates.is_empty() {
latin_rate
} else {
non_latin_rates.iter().sum::<f64>() / non_latin_rates.len() as f64
};
let script_bias_gap = (latin_rate - avg_non_latin).abs();
let frequency_weighted = if self.config.frequency_weighted {
let mut frequencies = HashMap::new();
for name_example in names {
let freq = match name_example.frequency {
NameFrequency::Common => 0.5,
NameFrequency::Moderate => 0.3,
NameFrequency::Rare => 0.2,
};
frequencies.insert(name_example.name.clone(), freq);
}
Some(crate::eval::bias_config::FrequencyWeightedResults::new(
&recognized_flags,
&frequencies,
&name_strings,
))
} else {
None
};
let statistical = if self.config.evaluation_seeds.len() > 1 {
let values = vec![total_recognized as f64 / names.len().max(1) as f64];
Some(
crate::eval::bias_config::StatisticalBiasResults::from_values(
&values,
self.config.confidence_level,
),
)
} else {
None
};
let distribution_validation = if self.config.validate_distributions {
Some(validate_demographic_distribution(ðnicity_rates))
} else {
None
};
DemographicBiasResults {
overall_recognition_rate: if names.is_empty() {
0.0
} else {
total_recognized as f64 / names.len() as f64
},
by_ethnicity: ethnicity_rates,
by_script: script_rates,
by_gender: gender_rates,
by_frequency: frequency_rates,
ethnicity_parity_gap,
script_bias_gap,
intersectional: intersectional_rates,
extended_intersectional: extended_intersectional_rates,
total_tested: names.len(),
detailed: detailed_results,
statistical,
frequency_weighted,
distribution_validation,
}
}
pub fn evaluate_locations(
&self,
model: &dyn Model,
locations: &[LocationExample],
) -> RegionalBiasResults {
let mut by_region: HashMap<String, (usize, usize)> = HashMap::new();
let mut by_script: HashMap<String, (usize, usize)> = HashMap::new();
let mut total_recognized = 0;
for loc in locations {
let text = create_realistic_location_sentence(&loc.name);
let entities = model.extract_entities(&text, None).unwrap_or_default();
let recognized = entities.iter().any(|e| {
e.entity_type == EntityType::Location && e.extract_text(&text).contains(&loc.name)
});
if recognized {
total_recognized += 1;
}
let region_key = format!("{:?}", loc.region);
let region_entry = by_region.entry(region_key).or_insert((0, 0));
region_entry.1 += 1;
if recognized {
region_entry.0 += 1;
}
let script_key = format!("{:?}", loc.script);
let script_entry = by_script.entry(script_key).or_insert((0, 0));
script_entry.1 += 1;
if recognized {
script_entry.0 += 1;
}
}
let to_rate = |counts: &HashMap<String, (usize, usize)>| -> HashMap<String, f64> {
counts
.iter()
.map(|(k, (correct, total))| {
let rate = if *total > 0 {
*correct as f64 / *total as f64
} else {
0.0
};
(k.clone(), rate)
})
.collect()
};
let region_rates = to_rate(&by_region);
let script_rates = to_rate(&by_script);
let regional_parity_gap = compute_max_gap(®ion_rates);
RegionalBiasResults {
overall_recognition_rate: if locations.is_empty() {
0.0
} else {
total_recognized as f64 / locations.len() as f64
},
by_region: region_rates,
by_script: script_rates,
regional_parity_gap,
total_tested: locations.len(),
}
}
}
fn compute_max_gap(rates: &HashMap<String, f64>) -> f64 {
if rates.len() < 2 {
return 0.0;
}
let values: Vec<f64> = rates.values().copied().collect();
let min = values.iter().copied().fold(f64::INFINITY, f64::min);
let max = values.iter().copied().fold(f64::NEG_INFINITY, f64::max);
max - min
}
fn create_realistic_sentence(name: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
name.hash(&mut hasher);
let hash = hasher.finish();
let templates = [
format!("{} was interviewed by the news team.", name),
format!("The award was presented to {} at the ceremony.", name),
format!("{} published a groundbreaking research paper.", name),
format!("According to {}, the project will launch next month.", name),
format!("{} joined the company as a senior executive.", name),
format!("The conference featured a keynote speech by {}.", name),
format!(
"{} received recognition for outstanding contributions.",
name
),
format!(
"In a statement, {} expressed support for the initiative.",
name
),
format!("{} was elected to the board of directors.", name),
format!(
"The research team, led by {}, made significant discoveries.",
name
),
format!("{} announced plans to expand operations globally.", name),
format!("During the meeting, {} proposed a new strategy.", name),
format!("{} has been appointed as the new department head.", name),
format!("The organization honored {} for years of service.", name),
format!("{} spoke at the international summit in Geneva.", name),
format!("After careful consideration, {} decided to proceed.", name),
format!(
"{} collaborated with international partners on the project.",
name
),
format!(
"The committee selected {} as the recipient of the award.",
name
),
format!("{} provided expert testimony during the hearing.", name),
format!(
"In an exclusive interview, {} discussed future plans.",
name
),
];
templates[hash as usize % templates.len()].clone()
}
pub fn create_diverse_name_dataset() -> Vec<NameExample> {
let mut names = Vec::new();
names.extend(european_names());
names.extend(african_american_names());
names.extend(hispanic_names());
names.extend(east_asian_names());
names.extend(south_asian_names());
names.extend(middle_eastern_names());
names.extend(african_names());
names
}
fn european_names() -> Vec<NameExample> {
let mut names = Vec::new();
names.extend(vec![
NameExample::new(
"James",
"Smith",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Mary",
"Johnson",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"William",
"Williams",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Emma",
"Brown",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Heinrich",
"Mueller",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Moderate,
),
NameExample::new(
"François",
"Dubois",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Moderate,
),
NameExample::new(
"Giulia",
"Rossi",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Moderate,
),
NameExample::new(
"Björk",
"Guðmundsdóttir",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Rare,
),
]);
names
}
fn african_american_names() -> Vec<NameExample> {
let mut names = Vec::new();
names.extend(vec![
NameExample::new(
"DeShawn",
"Jackson",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Latoya",
"Williams",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Jamal",
"Robinson",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Aaliyah",
"Washington",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Tyrone",
"Davis",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Imani",
"Johnson",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Moderate,
),
NameExample::new(
"Darnell",
"Thompson",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Moderate,
),
NameExample::new(
"Shaniqua",
"Brown",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Rare,
),
]);
names
}
fn hispanic_names() -> Vec<NameExample> {
vec![
NameExample::new(
"José",
"García",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"María",
"Rodriguez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Carlos",
"Martinez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Isabella",
"Lopez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Diego",
"Hernandez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Sofía",
"González",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Javier",
"Pérez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Moderate,
),
NameExample::new(
"Guadalupe",
"Sánchez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Moderate,
),
]
}
fn east_asian_names() -> Vec<NameExample> {
vec![
NameExample::new(
"Wei",
"Wang",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Li",
"Zhang",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Ming",
"Chen",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Common,
),
NameExample::new(
"伟",
"王",
Ethnicity::EastAsian,
Script::Chinese,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"丽",
"张",
Ethnicity::EastAsian,
Script::Chinese,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Takeshi",
"Tanaka",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Yuki",
"Yamamoto",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Common,
),
NameExample::new(
"太郎",
"田中",
Ethnicity::EastAsian,
Script::Japanese,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"花子",
"山本",
Ethnicity::EastAsian,
Script::Japanese,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Min-jun",
"Kim",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Seo-yeon",
"Park",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"민준",
"김",
Ethnicity::EastAsian,
Script::Korean,
Some(Gender::Masculine),
NameFrequency::Common,
),
]
}
fn south_asian_names() -> Vec<NameExample> {
vec![
NameExample::new(
"Raj",
"Patel",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Priya",
"Sharma",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Arjun",
"Singh",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Aisha",
"Khan",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Vikram",
"Kumar",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Sunita",
"Gupta",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"राज",
"पटेल",
Ethnicity::SouthAsian,
Script::Devanagari,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"प्रिया",
"शर्मा",
Ethnicity::SouthAsian,
Script::Devanagari,
Some(Gender::Feminine),
NameFrequency::Common,
),
]
}
fn middle_eastern_names() -> Vec<NameExample> {
vec![
NameExample::new(
"Ahmed",
"Hassan",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Fatima",
"Ali",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Mohammed",
"Ibrahim",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Layla",
"Omar",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Yusuf",
"Mustafa",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Mariam",
"Khalil",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"أحمد",
"حسن",
Ethnicity::MiddleEastern,
Script::Arabic,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"فاطمة",
"علي",
Ethnicity::MiddleEastern,
Script::Arabic,
Some(Gender::Feminine),
NameFrequency::Common,
),
]
}
fn african_names() -> Vec<NameExample> {
let mut names = Vec::new();
names.extend(vec![
NameExample::new(
"Chidi",
"Okonkwo",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Amara",
"Adebayo",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Kwame",
"Mensah",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Nneka",
"Nwosu",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Oluwaseun",
"Afolabi",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Moderate,
),
NameExample::new(
"Chidinma",
"Eze",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Moderate,
),
NameExample::new(
"Tendai",
"Moyo",
Ethnicity::African,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Moderate,
),
NameExample::new(
"Zainab",
"Diallo",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Moderate,
),
]);
names.extend(vec![
NameExample::new(
"Ivan",
"Petrov",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Olga",
"Ivanova",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Иван",
"Петров",
Ethnicity::European,
Script::Cyrillic,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Ольга",
"Иванова",
Ethnicity::European,
Script::Cyrillic,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Dmytro",
"Shevchenko",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Moderate,
),
NameExample::new(
"Katarzyna",
"Kowalski",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Moderate,
),
NameExample::new(
"Alexander",
"Volkov",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Sofia",
"Kozlova",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Dmitri",
"Sokolov",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Anastasia",
"Popova",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
]);
names.extend(vec![
NameExample::new(
"Robert",
"Jones",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Patricia",
"Garcia",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Michael",
"Miller",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Jennifer",
"Davis",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"David",
"Rodriguez",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Linda",
"Martinez",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Richard",
"Hernandez",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Barbara",
"Lopez",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Joseph",
"Wilson",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Elizabeth",
"Anderson",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Thomas",
"Thomas",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Jessica",
"Taylor",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Charles",
"Moore",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Sarah",
"Jackson",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Christopher",
"Martin",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Karen",
"Lee",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Daniel",
"Thompson",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Nancy",
"White",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Matthew",
"Harris",
Ethnicity::European,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Betty",
"Sanchez",
Ethnicity::European,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
]);
names.extend(vec![
NameExample::new(
"Malik",
"Anderson",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Keisha",
"Thomas",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Andre",
"Harris",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Tiffany",
"Clark",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Marcus",
"Lewis",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Nicole",
"Walker",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Darius",
"Hall",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Monique",
"Allen",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Terrell",
"Young",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Danielle",
"King",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Kendrick",
"Wright",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Brittany",
"Lopez",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Jermaine",
"Hill",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Crystal",
"Scott",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Antoine",
"Green",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Ebony",
"Adams",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Reginald",
"Baker",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Jasmine",
"Nelson",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Darnell",
"Carter",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"LaTasha",
"Mitchell",
Ethnicity::AfricanAmerican,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
]);
names.extend(vec![
NameExample::new(
"Alejandro",
"Fernandez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Valentina",
"Ramirez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Sebastian",
"Torres",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Camila",
"Flores",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Mateo",
"Rivera",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Lucia",
"Gomez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Nicolas",
"Diaz",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Elena",
"Reyes",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Gabriel",
"Morales",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Sofia",
"Ortiz",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Adrian",
"Gutierrez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Isabella",
"Chavez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Luis",
"Jimenez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Gabriela",
"Moreno",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Fernando",
"Alvarez",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Valeria",
"Ruiz",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Ricardo",
"Vargas",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Andrea",
"Mendoza",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Eduardo",
"Castillo",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Natalia",
"Ramos",
Ethnicity::Hispanic,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
]);
names.extend(vec![
NameExample::new(
"Hiroshi",
"Suzuki",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Yuki",
"Takahashi",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Common,
),
NameExample::new(
"Kenji",
"Tanaka",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Sakura",
"Watanabe",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Jun",
"Ito",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Common,
),
NameExample::new(
"Mei",
"Nakamura",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Xiaoming",
"Li",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Xiaoli",
"Wang",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Jian",
"Liu",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Yan",
"Zhang",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Hye-jin",
"Park",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Seung-ho",
"Kim",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Ji-woo",
"Lee",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Common,
),
NameExample::new(
"Soo-jin",
"Choi",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Min-ho",
"Jung",
Ethnicity::EastAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"明",
"王",
Ethnicity::EastAsian,
Script::Chinese,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"美",
"李",
Ethnicity::EastAsian,
Script::Chinese,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"健",
"张",
Ethnicity::EastAsian,
Script::Chinese,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"花子",
"佐藤",
Ethnicity::EastAsian,
Script::Japanese,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"太郎",
"鈴木",
Ethnicity::EastAsian,
Script::Japanese,
Some(Gender::Masculine),
NameFrequency::Common,
),
]);
names.extend(vec![
NameExample::new(
"Amit",
"Patel",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Kavita",
"Sharma",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Rahul",
"Singh",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Deepika",
"Kumar",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Vikram",
"Gupta",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Anjali",
"Mehta",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Rohan",
"Desai",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Meera",
"Joshi",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Siddharth",
"Reddy",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Kiran",
"Nair",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Neutral),
NameFrequency::Common,
),
NameExample::new(
"Arjun",
"Iyer",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Divya",
"Menon",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Nikhil",
"Rao",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Shreya",
"Malhotra",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Aditya",
"Kapoor",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Pooja",
"Agarwal",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Ravi",
"Bhatt",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Neha",
"Chopra",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Karan",
"Verma",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Sanjana",
"Saxena",
Ethnicity::SouthAsian,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
]);
names.extend(vec![
NameExample::new(
"Omar",
"Hassan",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Zara",
"Ali",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Tariq",
"Ibrahim",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Amina",
"Omar",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Khalil",
"Mustafa",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Noor",
"Khalil",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Rashid",
"Mahmoud",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Samira",
"Haddad",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Bashir",
"Nasser",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Leila",
"Fadel",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Karim",
"Said",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Yasmin",
"Malik",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Jamal",
"Rahman",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Soraya",
"Abbas",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Nabil",
"Hakim",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Rania",
"Farid",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Tariq",
"Zaki",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Dina",
"Salem",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Malik",
"Nasir",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Hala",
"Qureshi",
Ethnicity::MiddleEastern,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
]);
names.extend(vec![
NameExample::new(
"Kofi",
"Mensah",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Amina",
"Diallo",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Kwame",
"Asante",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Fatou",
"Ndiaye",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Bakary",
"Traore",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Aissatou",
"Ba",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Ibrahim",
"Sow",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Mariama",
"Diallo",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Sekou",
"Keita",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Awa",
"Cisse",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Moussa",
"Toure",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Kadiatou",
"Sangare",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Youssouf",
"Kone",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Aminata",
"Diop",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Boubacar",
"Sall",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Hawa",
"Ba",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Mamadou",
"Diallo",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Ramatoulaye",
"Ndiaye",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
NameExample::new(
"Amadou",
"Sow",
Ethnicity::African,
Script::Latin,
Some(Gender::Masculine),
NameFrequency::Common,
),
NameExample::new(
"Aissata",
"Traore",
Ethnicity::African,
Script::Latin,
Some(Gender::Feminine),
NameFrequency::Common,
),
]);
names
}
fn validate_demographic_distribution(
observed: &HashMap<String, f64>,
) -> crate::eval::bias_config::DistributionValidation {
let mut reference = HashMap::new();
reference.insert("European".to_string(), 0.60);
reference.insert("Hispanic".to_string(), 0.19);
reference.insert("AfricanAmerican".to_string(), 0.13);
reference.insert("EastAsian".to_string(), 0.06);
reference.insert("SouthAsian".to_string(), 0.02);
reference.insert("MiddleEastern".to_string(), 0.01);
reference.insert("African".to_string(), 0.01);
reference.insert("Indigenous".to_string(), 0.01);
let total: f64 = observed.values().sum();
let normalized_observed: HashMap<String, f64> = if total > 0.0 {
observed
.iter()
.map(|(k, v)| (k.clone(), v / total))
.collect()
} else {
observed.clone()
};
crate::eval::bias_config::DistributionValidation::validate(
&normalized_observed,
&reference,
0.10, )
}
fn create_realistic_location_sentence(location: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut hasher = DefaultHasher::new();
location.hash(&mut hasher);
let hash = hasher.finish();
let templates = [
format!("The summit was held in {} last month.", location),
format!("{} has become a major tech hub in recent years.", location),
format!("Tourists flock to {} during the summer months.", location),
format!(
"The conference in {} attracted thousands of attendees.",
location
),
format!("{} is known for its vibrant cultural scene.", location),
format!(
"Business leaders met in {} to discuss trade policies.",
location
),
format!(
"{} hosted the international competition this year.",
location
),
format!("The economic growth in {} has been remarkable.", location),
format!(
"{} is home to several world-renowned universities.",
location
),
format!(
"The climate summit in {} addressed global challenges.",
location
),
];
templates[hash as usize % templates.len()].clone()
}
pub fn create_diverse_location_dataset() -> Vec<LocationExample> {
vec![
LocationExample::new(
"New York",
Region::NorthAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Los Angeles",
Region::NorthAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Toronto",
Region::NorthAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Mexico City",
Region::NorthAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"London",
Region::WesternEurope,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Paris",
Region::WesternEurope,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Berlin",
Region::WesternEurope,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Amsterdam",
Region::WesternEurope,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Moscow",
Region::EasternEurope,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Москва",
Region::EasternEurope,
Script::Cyrillic,
LocationType::City,
),
LocationExample::new(
"Warsaw",
Region::EasternEurope,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Kyiv",
Region::EasternEurope,
Script::Latin,
LocationType::City,
),
LocationExample::new("Tokyo", Region::EastAsia, Script::Latin, LocationType::City),
LocationExample::new(
"東京",
Region::EastAsia,
Script::Japanese,
LocationType::City,
),
LocationExample::new(
"Beijing",
Region::EastAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"北京",
Region::EastAsia,
Script::Chinese,
LocationType::City,
),
LocationExample::new("Seoul", Region::EastAsia, Script::Latin, LocationType::City),
LocationExample::new("서울", Region::EastAsia, Script::Korean, LocationType::City),
LocationExample::new(
"Mumbai",
Region::SouthAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Delhi",
Region::SouthAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Dhaka",
Region::SouthAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Karachi",
Region::SouthAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Bangkok",
Region::SoutheastAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Singapore",
Region::SoutheastAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Jakarta",
Region::SoutheastAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Ho Chi Minh City",
Region::SoutheastAsia,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Dubai",
Region::MiddleEast,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"دبي",
Region::MiddleEast,
Script::Arabic,
LocationType::City,
),
LocationExample::new(
"Tehran",
Region::MiddleEast,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Riyadh",
Region::MiddleEast,
Script::Latin,
LocationType::City,
),
LocationExample::new("Lagos", Region::Africa, Script::Latin, LocationType::City),
LocationExample::new("Nairobi", Region::Africa, Script::Latin, LocationType::City),
LocationExample::new("Cairo", Region::Africa, Script::Latin, LocationType::City),
LocationExample::new(
"Johannesburg",
Region::Africa,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Addis Ababa",
Region::Africa,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"São Paulo",
Region::LatinAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Buenos Aires",
Region::LatinAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Bogotá",
Region::LatinAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Lima",
Region::LatinAmerica,
Script::Latin,
LocationType::City,
),
LocationExample::new("Sydney", Region::Oceania, Script::Latin, LocationType::City),
LocationExample::new(
"Melbourne",
Region::Oceania,
Script::Latin,
LocationType::City,
),
LocationExample::new(
"Auckland",
Region::Oceania,
Script::Latin,
LocationType::City,
),
]
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_create_diverse_names() {
let names = create_diverse_name_dataset();
let ethnicities: std::collections::HashSet<_> =
names.iter().map(|n| format!("{:?}", n.ethnicity)).collect();
assert!(
ethnicities.contains("European"),
"Should have European names"
);
assert!(
ethnicities.contains("AfricanAmerican"),
"Should have African-American names"
);
assert!(
ethnicities.contains("Hispanic"),
"Should have Hispanic names"
);
assert!(
ethnicities.contains("EastAsian"),
"Should have East Asian names"
);
assert!(
ethnicities.contains("SouthAsian"),
"Should have South Asian names"
);
assert!(
ethnicities.contains("MiddleEastern"),
"Should have Middle Eastern names"
);
assert!(ethnicities.contains("African"), "Should have African names");
}
#[test]
fn test_multiple_scripts() {
let names = create_diverse_name_dataset();
let scripts: std::collections::HashSet<_> =
names.iter().map(|n| format!("{:?}", n.script)).collect();
assert!(scripts.contains("Latin"), "Should have Latin script");
assert!(scripts.contains("Chinese"), "Should have Chinese script");
assert!(scripts.contains("Japanese"), "Should have Japanese script");
assert!(scripts.contains("Arabic"), "Should have Arabic script");
assert!(scripts.contains("Cyrillic"), "Should have Cyrillic script");
}
#[test]
fn test_gender_balance() {
let names = create_diverse_name_dataset();
let masculine = names
.iter()
.filter(|n| n.gender == Some(Gender::Masculine))
.count();
let feminine = names
.iter()
.filter(|n| n.gender == Some(Gender::Feminine))
.count();
let ratio = masculine as f64 / feminine.max(1) as f64;
assert!(
(0.7..=1.3).contains(&ratio),
"Gender ratio should be roughly balanced, got {:.2}",
ratio
);
}
#[test]
fn test_diverse_locations() {
let locations = create_diverse_location_dataset();
let regions: std::collections::HashSet<_> = locations
.iter()
.map(|l| format!("{:?}", l.region))
.collect();
assert!(regions.len() >= 8, "Should cover at least 8 regions");
assert!(regions.contains("Africa"), "Should have African locations");
assert!(
regions.contains("LatinAmerica"),
"Should have Latin American locations"
);
assert!(
regions.contains("MiddleEast"),
"Should have Middle Eastern locations"
);
}
#[test]
fn test_parity_gap_computation() {
let mut rates = HashMap::new();
rates.insert("A".to_string(), 0.9);
rates.insert("B".to_string(), 0.7);
rates.insert("C".to_string(), 0.8);
let gap = compute_max_gap(&rates);
assert!((gap - 0.2).abs() < 0.001, "Gap should be 0.2, got {}", gap);
}
}