Skip to main content

datasynth_core/templates/realism/
user_ids.rs

1//! Realistic corporate user ID generation.
2//!
3//! Generates user IDs in various corporate patterns including standard
4//! employee IDs, system accounts, and service accounts.
5
6use rand::seq::SliceRandom;
7use rand::Rng;
8use serde::{Deserialize, Serialize};
9
10/// User ID pattern types.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
12#[serde(rename_all = "snake_case")]
13pub enum UserIdPattern {
14    /// First initial + last name + disambiguator (e.g., JSMITH001)
15    #[default]
16    InitialLastName,
17    /// First name + last name with dot (e.g., john.smith)
18    DotSeparated,
19    /// First name + underscore + last name (e.g., john_smith)
20    UnderscoreSeparated,
21    /// Last name + first initial (e.g., smithj)
22    LastNameInitial,
23    /// Employee number format (e.g., E00012345)
24    EmployeeNumber,
25    /// System account format (e.g., SVC_BATCH)
26    SystemAccount,
27    /// Admin account format (e.g., admin_gl)
28    AdminAccount,
29    /// Interface account format (e.g., INT_SAP)
30    InterfaceAccount,
31}
32
33/// User ID generator with multiple pattern support.
34#[derive(Debug, Clone)]
35pub struct UserIdGenerator {
36    default_pattern: UserIdPattern,
37    system_prefixes: Vec<&'static str>,
38    admin_prefixes: Vec<&'static str>,
39    interface_prefixes: Vec<&'static str>,
40    system_suffixes: Vec<&'static str>,
41}
42
43impl Default for UserIdGenerator {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49impl UserIdGenerator {
50    /// Create a new user ID generator.
51    pub fn new() -> Self {
52        Self {
53            default_pattern: UserIdPattern::InitialLastName,
54            system_prefixes: vec!["SVC_", "SYS_", "BATCH_", "AUTO_", "SCHED_"],
55            admin_prefixes: vec!["admin_", "ADMIN_", "adm_", "root_"],
56            interface_prefixes: vec!["INT_", "IF_", "INTF_", "API_", "EDI_"],
57            system_suffixes: vec![
58                "BATCH",
59                "PROCESS",
60                "RECON",
61                "IMPORT",
62                "EXPORT",
63                "SYNC",
64                "SCHEDULER",
65                "MONITOR",
66                "BACKUP",
67                "ARCHIVE",
68                "CLEANUP",
69                "POSTING",
70                "INTERFACE",
71            ],
72        }
73    }
74
75    /// Generate a user ID using the default pattern.
76    pub fn generate(
77        &self,
78        first_name: &str,
79        last_name: &str,
80        index: usize,
81        rng: &mut impl Rng,
82    ) -> String {
83        self.generate_with_pattern(first_name, last_name, index, self.default_pattern, rng)
84    }
85
86    /// Generate a user ID with a specific pattern.
87    pub fn generate_with_pattern(
88        &self,
89        first_name: &str,
90        last_name: &str,
91        index: usize,
92        pattern: UserIdPattern,
93        rng: &mut impl Rng,
94    ) -> String {
95        match pattern {
96            UserIdPattern::InitialLastName => self.initial_last_name(first_name, last_name, index),
97            UserIdPattern::DotSeparated => self.dot_separated(first_name, last_name, index),
98            UserIdPattern::UnderscoreSeparated => {
99                self.underscore_separated(first_name, last_name, index)
100            }
101            UserIdPattern::LastNameInitial => self.last_name_initial(first_name, last_name, index),
102            UserIdPattern::EmployeeNumber => self.employee_number(index),
103            UserIdPattern::SystemAccount => self.system_account(rng),
104            UserIdPattern::AdminAccount => self.admin_account(rng),
105            UserIdPattern::InterfaceAccount => self.interface_account(rng),
106        }
107    }
108
109    /// Generate a random pattern user ID.
110    pub fn generate_random_pattern(
111        &self,
112        first_name: &str,
113        last_name: &str,
114        index: usize,
115        rng: &mut impl Rng,
116    ) -> String {
117        let pattern = self.select_pattern(rng);
118        self.generate_with_pattern(first_name, last_name, index, pattern, rng)
119    }
120
121    /// Generate a system account ID.
122    pub fn generate_system_account(&self, rng: &mut impl Rng) -> String {
123        self.system_account(rng)
124    }
125
126    /// Generate an admin account ID.
127    pub fn generate_admin_account(&self, rng: &mut impl Rng) -> String {
128        self.admin_account(rng)
129    }
130
131    /// Generate an interface account ID.
132    pub fn generate_interface_account(&self, system_name: &str) -> String {
133        format!("INT_{}", system_name.to_uppercase())
134    }
135
136    fn select_pattern(&self, rng: &mut impl Rng) -> UserIdPattern {
137        let roll: f64 = rng.gen();
138        if roll < 0.40 {
139            UserIdPattern::InitialLastName
140        } else if roll < 0.65 {
141            UserIdPattern::DotSeparated
142        } else if roll < 0.80 {
143            UserIdPattern::LastNameInitial
144        } else if roll < 0.90 {
145            UserIdPattern::UnderscoreSeparated
146        } else {
147            UserIdPattern::EmployeeNumber
148        }
149    }
150
151    fn initial_last_name(&self, first_name: &str, last_name: &str, index: usize) -> String {
152        let first_initial = first_name
153            .chars()
154            .next()
155            .unwrap_or('X')
156            .to_ascii_uppercase();
157        let last_part: String = last_name
158            .chars()
159            .filter(|c| c.is_ascii_alphabetic())
160            .take(7)
161            .collect::<String>()
162            .to_uppercase();
163
164        if index == 0 {
165            format!("{}{}", first_initial, last_part)
166        } else {
167            format!("{}{}{}", first_initial, last_part, index)
168        }
169    }
170
171    fn dot_separated(&self, first_name: &str, last_name: &str, index: usize) -> String {
172        let first: String = first_name
173            .chars()
174            .filter(|c| c.is_ascii_alphabetic())
175            .collect::<String>()
176            .to_lowercase();
177        let last: String = last_name
178            .chars()
179            .filter(|c| c.is_ascii_alphabetic())
180            .collect::<String>()
181            .to_lowercase();
182
183        if index == 0 {
184            format!("{}.{}", first, last)
185        } else {
186            format!("{}.{}{}", first, last, index)
187        }
188    }
189
190    fn underscore_separated(&self, first_name: &str, last_name: &str, index: usize) -> String {
191        let first: String = first_name
192            .chars()
193            .filter(|c| c.is_ascii_alphabetic())
194            .collect::<String>()
195            .to_lowercase();
196        let last: String = last_name
197            .chars()
198            .filter(|c| c.is_ascii_alphabetic())
199            .collect::<String>()
200            .to_lowercase();
201
202        if index == 0 {
203            format!("{}_{}", first, last)
204        } else {
205            format!("{}_{}{}", first, last, index)
206        }
207    }
208
209    fn last_name_initial(&self, first_name: &str, last_name: &str, index: usize) -> String {
210        let last: String = last_name
211            .chars()
212            .filter(|c| c.is_ascii_alphabetic())
213            .take(8)
214            .collect::<String>()
215            .to_lowercase();
216        let first_initial = first_name
217            .chars()
218            .next()
219            .unwrap_or('x')
220            .to_ascii_lowercase();
221
222        if index == 0 {
223            format!("{}{}", last, first_initial)
224        } else {
225            format!("{}{}{}", last, first_initial, index)
226        }
227    }
228
229    fn employee_number(&self, index: usize) -> String {
230        format!("E{:08}", index)
231    }
232
233    fn system_account(&self, rng: &mut impl Rng) -> String {
234        let prefix = self.system_prefixes.choose(rng).unwrap_or(&"SVC_");
235        let suffix = self.system_suffixes.choose(rng).unwrap_or(&"BATCH");
236        format!("{}{}", prefix, suffix)
237    }
238
239    fn admin_account(&self, rng: &mut impl Rng) -> String {
240        let prefix = self.admin_prefixes.choose(rng).unwrap_or(&"admin_");
241        let systems = ["gl", "ap", "ar", "fa", "mm", "sd", "fi", "co", "hr", "pm"];
242        let system = systems.choose(rng).unwrap_or(&"gl");
243        format!("{}{}", prefix, system)
244    }
245
246    fn interface_account(&self, rng: &mut impl Rng) -> String {
247        let prefix = self.interface_prefixes.choose(rng).unwrap_or(&"INT_");
248        let systems = [
249            "SAP",
250            "ORACLE",
251            "SALESFORCE",
252            "WORKDAY",
253            "NETSUITE",
254            "DYNAMICS",
255            "SAGE",
256            "QUICKBOOKS",
257            "CONCUR",
258            "COUPA",
259            "ARIBA",
260            "BLACKLINE",
261            "HYPERION",
262            "ANAPLAN",
263        ];
264        let system = systems.choose(rng).unwrap_or(&"SAP");
265        format!("{}{}", prefix, system)
266    }
267}
268
269/// Email generator with corporate patterns.
270#[derive(Debug, Clone)]
271pub struct EmailGenerator {
272    domain: String,
273    patterns: Vec<EmailPattern>,
274}
275
276#[derive(Debug, Clone, Copy)]
277#[allow(dead_code)]
278enum EmailPattern {
279    FirstDotLast,
280    FirstInitialLast,
281    FirstUnderscoreLast,
282    LastDotFirst,
283    FirstOnly,
284}
285
286impl Default for EmailGenerator {
287    fn default() -> Self {
288        Self::new("company.com")
289    }
290}
291
292impl EmailGenerator {
293    /// Create a new email generator with the specified domain.
294    pub fn new(domain: &str) -> Self {
295        Self {
296            domain: domain.to_string(),
297            patterns: vec![
298                EmailPattern::FirstDotLast,
299                EmailPattern::FirstDotLast,
300                EmailPattern::FirstDotLast, // Weight toward common pattern
301                EmailPattern::FirstInitialLast,
302                EmailPattern::FirstUnderscoreLast,
303            ],
304        }
305    }
306
307    /// Set the email domain.
308    pub fn with_domain(mut self, domain: &str) -> Self {
309        self.domain = domain.to_string();
310        self
311    }
312
313    /// Generate an email address.
314    pub fn generate(&self, first_name: &str, last_name: &str, rng: &mut impl Rng) -> String {
315        let pattern = self
316            .patterns
317            .choose(rng)
318            .unwrap_or(&EmailPattern::FirstDotLast);
319        self.generate_with_pattern(first_name, last_name, *pattern)
320    }
321
322    /// Generate an email with a specific pattern.
323    fn generate_with_pattern(
324        &self,
325        first_name: &str,
326        last_name: &str,
327        pattern: EmailPattern,
328    ) -> String {
329        let first = self.sanitize_for_email(first_name);
330        let last = self.sanitize_for_email(last_name);
331
332        let local_part = match pattern {
333            EmailPattern::FirstDotLast => format!("{}.{}", first, last),
334            EmailPattern::FirstInitialLast => {
335                let initial = first.chars().next().unwrap_or('x');
336                format!("{}{}", initial, last)
337            }
338            EmailPattern::FirstUnderscoreLast => format!("{}_{}", first, last),
339            EmailPattern::LastDotFirst => format!("{}.{}", last, first),
340            EmailPattern::FirstOnly => first,
341        };
342
343        format!("{}@{}", local_part, self.domain)
344    }
345
346    /// Generate a generic/functional email address.
347    pub fn generate_functional(&self, function: &str) -> String {
348        format!("{}@{}", function.to_lowercase(), self.domain)
349    }
350
351    fn sanitize_for_email(&self, name: &str) -> String {
352        name.chars()
353            .filter(|c| c.is_ascii_alphabetic())
354            .collect::<String>()
355            .to_lowercase()
356    }
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362    use rand::SeedableRng;
363    use rand_chacha::ChaCha8Rng;
364
365    #[test]
366    fn test_initial_last_name_pattern() {
367        let mut rng = ChaCha8Rng::seed_from_u64(42);
368        let gen = UserIdGenerator::new();
369
370        let id =
371            gen.generate_with_pattern("John", "Smith", 0, UserIdPattern::InitialLastName, &mut rng);
372        assert_eq!(id, "JSMITH");
373
374        let id2 =
375            gen.generate_with_pattern("John", "Smith", 5, UserIdPattern::InitialLastName, &mut rng);
376        assert_eq!(id2, "JSMITH5");
377    }
378
379    #[test]
380    fn test_dot_separated_pattern() {
381        let mut rng = ChaCha8Rng::seed_from_u64(42);
382        let gen = UserIdGenerator::new();
383
384        let id =
385            gen.generate_with_pattern("John", "Smith", 0, UserIdPattern::DotSeparated, &mut rng);
386        assert_eq!(id, "john.smith");
387
388        let id2 =
389            gen.generate_with_pattern("John", "Smith", 3, UserIdPattern::DotSeparated, &mut rng);
390        assert_eq!(id2, "john.smith3");
391    }
392
393    #[test]
394    fn test_employee_number_pattern() {
395        let mut rng = ChaCha8Rng::seed_from_u64(42);
396        let gen = UserIdGenerator::new();
397
398        let id = gen.generate_with_pattern(
399            "John",
400            "Smith",
401            12345,
402            UserIdPattern::EmployeeNumber,
403            &mut rng,
404        );
405        assert_eq!(id, "E00012345");
406    }
407
408    #[test]
409    fn test_system_account() {
410        let mut rng = ChaCha8Rng::seed_from_u64(42);
411        let gen = UserIdGenerator::new();
412
413        let id = gen.generate_system_account(&mut rng);
414        assert!(
415            id.starts_with("SVC_")
416                || id.starts_with("SYS_")
417                || id.starts_with("BATCH_")
418                || id.starts_with("AUTO_")
419                || id.starts_with("SCHED_")
420        );
421    }
422
423    #[test]
424    fn test_interface_account() {
425        let gen = UserIdGenerator::new();
426        let id = gen.generate_interface_account("SAP");
427        assert_eq!(id, "INT_SAP");
428    }
429
430    #[test]
431    fn test_email_generation() {
432        let mut rng = ChaCha8Rng::seed_from_u64(42);
433        let gen = EmailGenerator::new("acme.com");
434
435        let email = gen.generate("John", "Smith", &mut rng);
436        assert!(email.ends_with("@acme.com"));
437        assert!(email.contains("john") || email.contains("smith") || email.contains("j"));
438    }
439
440    #[test]
441    fn test_email_with_non_ascii() {
442        let mut rng = ChaCha8Rng::seed_from_u64(42);
443        let gen = EmailGenerator::new("company.de");
444
445        let email = gen.generate("Jürgen", "Müller", &mut rng);
446        assert!(email.ends_with("@company.de"));
447        // Non-ASCII should be filtered out
448        assert!(!email.contains('ü'));
449    }
450
451    #[test]
452    fn test_functional_email() {
453        let gen = EmailGenerator::new("company.com");
454        let email = gen.generate_functional("accounts.payable");
455        assert_eq!(email, "accounts.payable@company.com");
456    }
457
458    #[test]
459    fn test_random_pattern_variety() {
460        let mut rng = ChaCha8Rng::seed_from_u64(42);
461        let gen = UserIdGenerator::new();
462
463        let mut patterns = std::collections::HashSet::new();
464        for i in 0..100 {
465            let id = gen.generate_random_pattern("John", "Smith", i, &mut rng);
466            patterns.insert(id);
467        }
468
469        // Should generate diverse IDs
470        assert!(patterns.len() > 10);
471    }
472}