datasynth_core/templates/realism/
mod.rs1pub mod addresses;
16pub mod company_names;
17pub mod descriptions;
18pub mod reference_formats;
19pub mod user_ids;
20pub mod vendor_names;
21
22pub use addresses::{Address, AddressGenerator, AddressRegion, AddressStyle};
23pub use company_names::{CompanyNameGenerator, CompanyNameStyle, Industry, LegalSuffix};
24pub use descriptions::{DescriptionVariator, TypoGenerator, VariationConfig};
25pub use reference_formats::{EnhancedReferenceFormat, EnhancedReferenceGenerator, ReferenceStyle};
26pub use user_ids::{UserIdGenerator, UserIdPattern};
27pub use vendor_names::{SpendCategory, VendorNameGenerator, VendorProfile};
28
29use rand::Rng;
30use serde::{Deserialize, Serialize};
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34#[serde(default)]
35pub struct RealismConfig {
36 pub cultural_awareness: bool,
38 pub industry_vendor_names: bool,
40 pub description_variations: bool,
42 pub abbreviation_rate: f64,
44 pub typo_rate: f64,
46 pub realistic_references: bool,
48 pub primary_region: AddressRegion,
50 pub international_diversity: bool,
52 pub diversity_index: f64,
54}
55
56impl Default for RealismConfig {
57 fn default() -> Self {
58 Self {
59 cultural_awareness: true,
60 industry_vendor_names: true,
61 description_variations: true,
62 abbreviation_rate: 0.25,
63 typo_rate: 0.01,
64 realistic_references: true,
65 primary_region: AddressRegion::NorthAmerica,
66 international_diversity: true,
67 diversity_index: 0.3,
68 }
69 }
70}
71
72#[derive(Debug, Clone)]
74pub struct RealismGenerator {
75 config: RealismConfig,
76 company_gen: CompanyNameGenerator,
77 vendor_gen: VendorNameGenerator,
78 description_var: DescriptionVariator,
79 user_id_gen: UserIdGenerator,
80 reference_gen: EnhancedReferenceGenerator,
81 address_gen: AddressGenerator,
82}
83
84impl RealismGenerator {
85 pub fn new() -> Self {
87 Self::with_config(RealismConfig::default())
88 }
89
90 pub fn with_config(config: RealismConfig) -> Self {
92 let variation_config = VariationConfig {
93 abbreviation_rate: config.abbreviation_rate,
94 typo_rate: config.typo_rate,
95 case_variation_rate: 0.05,
96 ..Default::default()
97 };
98
99 Self {
100 company_gen: CompanyNameGenerator::new(),
101 vendor_gen: VendorNameGenerator::new(),
102 description_var: DescriptionVariator::with_config(variation_config),
103 user_id_gen: UserIdGenerator::new(),
104 reference_gen: EnhancedReferenceGenerator::new(),
105 address_gen: AddressGenerator::for_region(config.primary_region),
106 config,
107 }
108 }
109
110 pub fn config(&self) -> &RealismConfig {
112 &self.config
113 }
114
115 pub fn generate_company_name(&self, industry: Industry, rng: &mut impl Rng) -> String {
117 self.company_gen.generate(industry, rng)
118 }
119
120 pub fn generate_vendor_name(&self, category: SpendCategory, rng: &mut impl Rng) -> String {
122 self.vendor_gen.generate(category, rng)
123 }
124
125 pub fn vary_description(&self, description: &str, rng: &mut impl Rng) -> String {
127 if self.config.description_variations {
128 self.description_var.apply(description, rng)
129 } else {
130 description.to_string()
131 }
132 }
133
134 pub fn generate_user_id(
136 &self,
137 first_name: &str,
138 last_name: &str,
139 index: usize,
140 rng: &mut impl Rng,
141 ) -> String {
142 self.user_id_gen.generate(first_name, last_name, index, rng)
143 }
144
145 pub fn generate_reference(
147 &self,
148 format: EnhancedReferenceFormat,
149 year: i32,
150 rng: &mut impl Rng,
151 ) -> String {
152 self.reference_gen.generate(format, year, rng)
153 }
154
155 pub fn generate_address(&self, rng: &mut impl Rng) -> Address {
157 self.address_gen.generate(rng)
158 }
159
160 pub fn company_names(&self) -> &CompanyNameGenerator {
162 &self.company_gen
163 }
164
165 pub fn vendor_names(&self) -> &VendorNameGenerator {
167 &self.vendor_gen
168 }
169
170 pub fn descriptions(&self) -> &DescriptionVariator {
172 &self.description_var
173 }
174
175 pub fn user_ids(&self) -> &UserIdGenerator {
177 &self.user_id_gen
178 }
179
180 pub fn references(&self) -> &EnhancedReferenceGenerator {
182 &self.reference_gen
183 }
184
185 pub fn addresses(&self) -> &AddressGenerator {
187 &self.address_gen
188 }
189}
190
191impl Default for RealismGenerator {
192 fn default() -> Self {
193 Self::new()
194 }
195}
196
197#[cfg(test)]
198#[allow(clippy::unwrap_used)]
199mod tests {
200 use super::*;
201 use rand::SeedableRng;
202 use rand_chacha::ChaCha8Rng;
203
204 #[test]
205 fn test_realism_generator_creation() {
206 let gen = RealismGenerator::new();
207 assert!(gen.config().cultural_awareness);
208 assert!(gen.config().description_variations);
209 }
210
211 #[test]
212 fn test_realism_generator_with_config() {
213 let config = RealismConfig {
214 abbreviation_rate: 0.5,
215 typo_rate: 0.0,
216 ..Default::default()
217 };
218 let gen = RealismGenerator::with_config(config);
219 assert_eq!(gen.config().abbreviation_rate, 0.5);
220 assert_eq!(gen.config().typo_rate, 0.0);
221 }
222
223 #[test]
224 fn test_generate_company_name() {
225 let mut rng = ChaCha8Rng::seed_from_u64(42);
226 let gen = RealismGenerator::new();
227 let name = gen.generate_company_name(Industry::Manufacturing, &mut rng);
228 assert!(!name.is_empty());
229 }
230
231 #[test]
232 fn test_generate_vendor_name() {
233 let mut rng = ChaCha8Rng::seed_from_u64(42);
234 let gen = RealismGenerator::new();
235 let name = gen.generate_vendor_name(SpendCategory::OfficeSupplies, &mut rng);
236 assert!(!name.is_empty());
237 }
238
239 #[test]
240 fn test_vary_description() {
241 let mut rng = ChaCha8Rng::seed_from_u64(42);
242 let config = RealismConfig {
243 abbreviation_rate: 1.0, typo_rate: 0.0,
245 ..Default::default()
246 };
247 let gen = RealismGenerator::with_config(config);
248 let varied = gen.vary_description("Invoice for Purchase Order", &mut rng);
249 assert!(
251 varied.contains("Inv")
252 || varied.contains("PO")
253 || varied == "Invoice for Purchase Order"
254 );
255 }
256}