use rand::RngExt;
use rand_distr::Normal;
fn random_copy<T: Copy, R: Rng>(rng: &mut R, values: &[T]) -> Option<T> {
if values.is_empty() {
return None;
}
values.get(rng.random_range(0..values.len())).copied()
}
pub struct Faker<'a, R: Rng> {
rng: &'a mut R,
}
impl<'a, R: Rng> Faker<'a, R> {
pub fn new(rng: &'a mut R) -> Self {
Self { rng }
}
pub fn name(&mut self, gender: Option<&str>) -> String {
let first_names = match gender {
Some("M") => &[
"James", "John", "Robert", "Michael", "William", "David", "Richard", "Joseph",
"Thomas", "Charles",
][..],
Some("F") => &[
"Mary",
"Patricia",
"Jennifer",
"Linda",
"Elizabeth",
"Barbara",
"Susan",
"Jessica",
"Sarah",
"Karen",
][..],
_ => &[
"James",
"Mary",
"John",
"Patricia",
"Robert",
"Jennifer",
"Michael",
"Linda",
"William",
"Elizabeth",
"David",
"Barbara",
"Richard",
"Susan",
"Joseph",
"Jessica",
][..],
};
let last_names = &[
"Smith",
"Johnson",
"Williams",
"Brown",
"Jones",
"Garcia",
"Miller",
"Davis",
"Rodriguez",
"Martinez",
"Hernandez",
"Lopez",
"Gonzalez",
"Wilson",
"Anderson",
];
let first_name = random_copy(self.rng, first_names).unwrap_or("");
let last_name = random_copy(self.rng, last_names).unwrap_or("");
format!("{last_name}^{first_name}")
}
pub fn address(&mut self) -> String {
let streets = &[
"Main St",
"Oak Ave",
"Pine Rd",
"Elm St",
"Maple Dr",
"Cedar Ln",
"Birch Way",
"Washington St",
"Lake St",
"Hill St",
];
let cities = &[
"Anytown",
"Springfield",
"Riverside",
"Fairview",
"Centerville",
"Georgetown",
"Mount Pleasant",
"Oakland",
"Middletown",
"Franklin",
];
let states = &["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DE", "FL", "GA"];
let street_number = self.rng.random_range(100..9999);
let street = random_copy(self.rng, streets).unwrap_or("");
let city = random_copy(self.rng, cities).unwrap_or("");
let state = random_copy(self.rng, states).unwrap_or("");
let zip = format!("{:05}", self.rng.random_range(10000..99999));
format!(
"{} {}^^{}^{}^{}^{}",
street_number, street, city, state, zip, "USA"
)
}
pub fn phone(&mut self) -> String {
let area_code = self.rng.random_range(200..999);
let exchange = self.rng.random_range(200..999);
let number = self.rng.random_range(1000..9999);
format!("({area_code}){exchange}-{number}")
}
pub fn ssn(&mut self) -> String {
let part1 = self.rng.random_range(100..999);
let part2 = self.rng.random_range(10..99);
let part3 = self.rng.random_range(1000..9999);
format!("{part1}-{part2}-{part3}")
}
pub fn mrn(&mut self) -> String {
let length = self.rng.random_range(6..=10);
let mut mrn = String::new();
for _ in 0..length {
let digit = self.rng.random_range(0..10);
mrn.push_str(&digit.to_string());
}
mrn
}
pub fn icd10(&mut self) -> String {
let categories = &[
"A00", "B01", "C02", "D03", "E04", "F05", "G06", "H07", "I08", "J09",
];
let category = random_copy(self.rng, categories).unwrap_or("");
let subcode = self.rng.random_range(0..10);
format!("{category}.{subcode}")
}
pub fn loinc(&mut self) -> String {
let code = self.rng.random_range(10000..9999999);
code.to_string()
}
pub fn medication(&mut self) -> String {
let medications = &[
"Atorvastatin",
"Levothyroxine",
"Lisinopril",
"Metformin",
"Amlodipine",
"Metoprolol",
"Omeprazole",
"Simvastatin",
"Losartan",
"Albuterol",
];
random_copy(self.rng, medications).unwrap_or("").to_string()
}
pub fn allergen(&mut self) -> String {
let allergens = &[
"Penicillin",
"Latex",
"Peanuts",
"Shellfish",
"Eggs",
"Milk",
"Tree Nuts",
"Soy",
"Wheat",
"Bee Stings",
];
random_copy(self.rng, allergens).unwrap_or("").to_string()
}
pub fn blood_type(&mut self) -> String {
let blood_types = &["A+", "A-", "B+", "B-", "AB+", "AB-", "O+", "O-"];
random_copy(self.rng, blood_types).unwrap_or("").to_string()
}
pub fn ethnicity(&mut self) -> String {
let ethnicities = &[
"Hispanic or Latino",
"Not Hispanic or Latino",
"Declined to Specify",
];
random_copy(self.rng, ethnicities).unwrap_or("").to_string()
}
pub fn race(&mut self) -> String {
let races = &[
"American Indian or Alaska Native",
"Asian",
"Black or African American",
"Native Hawaiian or Other Pacific Islander",
"White",
"Declined to Specify",
];
random_copy(self.rng, races).unwrap_or("").to_string()
}
pub fn numeric(&mut self, digits: usize) -> String {
let mut result = String::new();
for _ in 0..digits {
let digit = self.rng.random_range(0..10);
result.push_str(&digit.to_string());
}
result
}
pub fn date(&mut self, start: &str, end: &str) -> Result<String, DateError> {
let start_date = chrono::NaiveDate::parse_from_str(start, "%Y%m%d")
.map_err(|_err| DateError::InvalidDateFormat(start.to_string()))?;
let end_date = chrono::NaiveDate::parse_from_str(end, "%Y%m%d")
.map_err(|_err| DateError::InvalidDateFormat(end.to_string()))?;
if end_date < start_date {
return Err(DateError::InvalidDateRange {
start: start.to_string(),
end: end.to_string(),
});
}
let duration = end_date.signed_duration_since(start_date);
let days = duration.num_days();
let random_days = self.rng.random_range(0..=days);
let random_date = start_date
.checked_add_signed(chrono::Duration::days(random_days))
.ok_or(DateError::DateOutOfRange)?;
Ok(random_date.format("%Y%m%d").to_string())
}
pub fn gaussian(
&mut self,
mean: f64,
sd: f64,
precision: usize,
) -> Result<String, GaussianError> {
let normal = Normal::new(mean, sd).map_err(|_err| GaussianError::InvalidParameters)?;
let value = self.rng.sample(normal);
Ok(format!("{value:.precision$}"))
}
pub fn uuid_v4(&self) -> String {
uuid::Uuid::new_v4().to_string()
}
pub fn dtm_now_utc(&self) -> String {
let now = chrono::Utc::now();
now.format("%Y%m%d%H%M%S").to_string()
}
pub fn select_from(&mut self, options: &[String]) -> Option<String> {
if options.is_empty() {
return None;
}
let index = self.rng.random_range(0..options.len());
options.get(index).cloned()
}
pub fn select_from_map(
&mut self,
map: &std::collections::HashMap<String, String>,
) -> Option<String> {
if map.is_empty() {
return None;
}
let keys: Vec<&String> = map.keys().collect();
let index = self.rng.random_range(0..keys.len());
keys.get(index)
.and_then(|random_key| map.get(*random_key))
.cloned()
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum DateError {
InvalidDateFormat(String),
InvalidDateRange {
start: String,
end: String,
},
DateOutOfRange,
}
impl std::fmt::Display for DateError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DateError::InvalidDateFormat(s) => {
write!(f, "Invalid date format: {s} (expected YYYYMMDD)")
}
DateError::InvalidDateRange { start, end } => {
write!(f, "Invalid date range: {start} is after {end}")
}
DateError::DateOutOfRange => write!(f, "Generated date is out of range"),
}
}
}
impl std::error::Error for DateError {}
#[derive(Debug, Clone, PartialEq)]
pub enum GaussianError {
InvalidParameters,
}
impl std::fmt::Display for GaussianError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
GaussianError::InvalidParameters => write!(f, "Invalid Gaussian parameters"),
}
}
}
impl std::error::Error for GaussianError {}
#[derive(Debug, Clone, PartialEq)]
pub enum FakerValue {
Fixed(String),
From(Vec<String>),
Numeric {
digits: usize,
},
Date {
start: String,
end: String,
},
Gaussian {
mean: f64,
sd: f64,
precision: usize,
},
Map(std::collections::HashMap<String, String>),
UuidV4,
DtmNowUtc,
RealisticName {
gender: Option<String>,
},
RealisticAddress,
RealisticPhone,
RealisticSsn,
RealisticMrn,
RealisticIcd10,
RealisticLoinc,
RealisticMedication,
RealisticAllergen,
RealisticBloodType,
RealisticEthnicity,
RealisticRace,
}
impl FakerValue {
pub fn generate<R: Rng>(&self, faker: &mut Faker<R>) -> Result<String, GenerateError> {
match self {
FakerValue::Fixed(value) => Ok(value.clone()),
FakerValue::From(options) => faker
.select_from(options)
.ok_or(GenerateError::EmptyOptions),
FakerValue::Numeric { digits } => Ok(faker.numeric(*digits)),
FakerValue::Date { start, end } => faker.date(start, end).map_err(GenerateError::Date),
FakerValue::Gaussian {
mean,
sd,
precision,
} => faker
.gaussian(*mean, *sd, *precision)
.map_err(GenerateError::Gaussian),
FakerValue::Map(mapping) => faker
.select_from_map(mapping)
.ok_or(GenerateError::EmptyMap),
FakerValue::UuidV4 => Ok(faker.uuid_v4()),
FakerValue::DtmNowUtc => Ok(faker.dtm_now_utc()),
FakerValue::RealisticName { gender } => Ok(faker.name(gender.as_deref())),
FakerValue::RealisticAddress => Ok(faker.address()),
FakerValue::RealisticPhone => Ok(faker.phone()),
FakerValue::RealisticSsn => Ok(faker.ssn()),
FakerValue::RealisticMrn => Ok(faker.mrn()),
FakerValue::RealisticIcd10 => Ok(faker.icd10()),
FakerValue::RealisticLoinc => Ok(faker.loinc()),
FakerValue::RealisticMedication => Ok(faker.medication()),
FakerValue::RealisticAllergen => Ok(faker.allergen()),
FakerValue::RealisticBloodType => Ok(faker.blood_type()),
FakerValue::RealisticEthnicity => Ok(faker.ethnicity()),
FakerValue::RealisticRace => Ok(faker.race()),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum GenerateError {
EmptyOptions,
EmptyMap,
Date(DateError),
Gaussian(GaussianError),
}
impl std::fmt::Display for GenerateError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
GenerateError::EmptyOptions => write!(f, "Cannot select from empty options"),
GenerateError::EmptyMap => write!(f, "Cannot select from empty map"),
GenerateError::Date(e) => write!(f, "Date generation error: {e}"),
GenerateError::Gaussian(e) => write!(f, "Gaussian generation error: {e}"),
}
}
}
impl std::error::Error for GenerateError {}
pub use rand::Rng;
pub use rand::SeedableRng;
pub use rand::rngs::StdRng;