Skip to main content

datasynth_core/templates/realism/
user_ids.rs

1//! Realistic corporate user ID generation.
2//!
3//! Generates user IDs in various corporate patterns including standard
4//! employee IDs, system accounts, and service accounts.
5
6use rand::seq::IndexedRandom;
7use rand::Rng;
8use serde::{Deserialize, Serialize};
9
10/// User ID pattern types.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
12#[serde(rename_all = "snake_case")]
13pub enum UserIdPattern {
14    /// First initial + last name + disambiguator (e.g., JSMITH001)
15    #[default]
16    InitialLastName,
17    /// First name + last name with dot (e.g., john.smith)
18    DotSeparated,
19    /// First name + underscore + last name (e.g., john_smith)
20    UnderscoreSeparated,
21    /// Last name + first initial (e.g., smithj)
22    LastNameInitial,
23    /// Employee number format (e.g., E00012345)
24    EmployeeNumber,
25    /// System account format (e.g., SVC_BATCH)
26    SystemAccount,
27    /// Admin account format (e.g., admin_gl)
28    AdminAccount,
29    /// Interface account format (e.g., INT_SAP)
30    InterfaceAccount,
31}
32
33/// User ID generator with multiple pattern support.
34#[derive(Debug, Clone)]
35pub struct UserIdGenerator {
36    default_pattern: UserIdPattern,
37    system_prefixes: Vec<&'static str>,
38    admin_prefixes: Vec<&'static str>,
39    interface_prefixes: Vec<&'static str>,
40    system_suffixes: Vec<&'static str>,
41}
42
43impl Default for UserIdGenerator {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49impl UserIdGenerator {
50    /// Create a new user ID generator.
51    pub fn new() -> Self {
52        Self {
53            default_pattern: UserIdPattern::InitialLastName,
54            system_prefixes: vec!["SVC_", "SYS_", "BATCH_", "AUTO_", "SCHED_"],
55            admin_prefixes: vec!["admin_", "ADMIN_", "adm_", "root_"],
56            interface_prefixes: vec!["INT_", "IF_", "INTF_", "API_", "EDI_"],
57            system_suffixes: vec![
58                "BATCH",
59                "PROCESS",
60                "RECON",
61                "IMPORT",
62                "EXPORT",
63                "SYNC",
64                "SCHEDULER",
65                "MONITOR",
66                "BACKUP",
67                "ARCHIVE",
68                "CLEANUP",
69                "POSTING",
70                "INTERFACE",
71            ],
72        }
73    }
74
75    /// Generate a user ID using the default pattern.
76    pub fn generate(
77        &self,
78        first_name: &str,
79        last_name: &str,
80        index: usize,
81        rng: &mut impl Rng,
82    ) -> String {
83        self.generate_with_pattern(first_name, last_name, index, self.default_pattern, rng)
84    }
85
86    /// Generate a user ID with a specific pattern.
87    pub fn generate_with_pattern(
88        &self,
89        first_name: &str,
90        last_name: &str,
91        index: usize,
92        pattern: UserIdPattern,
93        rng: &mut impl Rng,
94    ) -> String {
95        match pattern {
96            UserIdPattern::InitialLastName => self.initial_last_name(first_name, last_name, index),
97            UserIdPattern::DotSeparated => self.dot_separated(first_name, last_name, index),
98            UserIdPattern::UnderscoreSeparated => {
99                self.underscore_separated(first_name, last_name, index)
100            }
101            UserIdPattern::LastNameInitial => self.last_name_initial(first_name, last_name, index),
102            UserIdPattern::EmployeeNumber => self.employee_number(index),
103            UserIdPattern::SystemAccount => self.system_account(rng),
104            UserIdPattern::AdminAccount => self.admin_account(rng),
105            UserIdPattern::InterfaceAccount => self.interface_account(rng),
106        }
107    }
108
109    /// Generate a random pattern user ID.
110    pub fn generate_random_pattern(
111        &self,
112        first_name: &str,
113        last_name: &str,
114        index: usize,
115        rng: &mut impl Rng,
116    ) -> String {
117        let pattern = self.select_pattern(rng);
118        self.generate_with_pattern(first_name, last_name, index, pattern, rng)
119    }
120
121    /// Generate a system account ID.
122    pub fn generate_system_account(&self, rng: &mut impl Rng) -> String {
123        self.system_account(rng)
124    }
125
126    /// Generate an admin account ID.
127    pub fn generate_admin_account(&self, rng: &mut impl Rng) -> String {
128        self.admin_account(rng)
129    }
130
131    /// Generate an interface account ID.
132    pub fn generate_interface_account(&self, system_name: &str) -> String {
133        format!("INT_{}", system_name.to_uppercase())
134    }
135
136    fn select_pattern(&self, rng: &mut impl Rng) -> UserIdPattern {
137        let roll: f64 = rng.random();
138        if roll < 0.40 {
139            UserIdPattern::InitialLastName
140        } else if roll < 0.65 {
141            UserIdPattern::DotSeparated
142        } else if roll < 0.80 {
143            UserIdPattern::LastNameInitial
144        } else if roll < 0.90 {
145            UserIdPattern::UnderscoreSeparated
146        } else {
147            UserIdPattern::EmployeeNumber
148        }
149    }
150
151    fn initial_last_name(&self, first_name: &str, last_name: &str, index: usize) -> String {
152        let first_initial = first_name
153            .chars()
154            .next()
155            .unwrap_or('X')
156            .to_ascii_uppercase();
157        let last_part: String = last_name
158            .chars()
159            .filter(|c| c.is_ascii_alphabetic())
160            .take(7)
161            .collect::<String>()
162            .to_uppercase();
163
164        if index == 0 {
165            format!("{}{}", first_initial, last_part)
166        } else {
167            format!("{}{}{}", first_initial, last_part, index)
168        }
169    }
170
171    fn dot_separated(&self, first_name: &str, last_name: &str, index: usize) -> String {
172        let first: String = first_name
173            .chars()
174            .filter(|c| c.is_ascii_alphabetic())
175            .collect::<String>()
176            .to_lowercase();
177        let last: String = last_name
178            .chars()
179            .filter(|c| c.is_ascii_alphabetic())
180            .collect::<String>()
181            .to_lowercase();
182
183        if index == 0 {
184            format!("{}.{}", first, last)
185        } else {
186            format!("{}.{}{}", first, last, index)
187        }
188    }
189
190    fn underscore_separated(&self, first_name: &str, last_name: &str, index: usize) -> String {
191        let first: String = first_name
192            .chars()
193            .filter(|c| c.is_ascii_alphabetic())
194            .collect::<String>()
195            .to_lowercase();
196        let last: String = last_name
197            .chars()
198            .filter(|c| c.is_ascii_alphabetic())
199            .collect::<String>()
200            .to_lowercase();
201
202        if index == 0 {
203            format!("{}_{}", first, last)
204        } else {
205            format!("{}_{}{}", first, last, index)
206        }
207    }
208
209    fn last_name_initial(&self, first_name: &str, last_name: &str, index: usize) -> String {
210        let last: String = last_name
211            .chars()
212            .filter(|c| c.is_ascii_alphabetic())
213            .take(8)
214            .collect::<String>()
215            .to_lowercase();
216        let first_initial = first_name
217            .chars()
218            .next()
219            .unwrap_or('x')
220            .to_ascii_lowercase();
221
222        if index == 0 {
223            format!("{}{}", last, first_initial)
224        } else {
225            format!("{}{}{}", last, first_initial, index)
226        }
227    }
228
229    fn employee_number(&self, index: usize) -> String {
230        format!("E{:08}", index)
231    }
232
233    fn system_account(&self, rng: &mut impl Rng) -> String {
234        let prefix = self.system_prefixes.choose(rng).unwrap_or(&"SVC_");
235        let suffix = self.system_suffixes.choose(rng).unwrap_or(&"BATCH");
236        format!("{}{}", prefix, suffix)
237    }
238
239    fn admin_account(&self, rng: &mut impl Rng) -> String {
240        let prefix = self.admin_prefixes.choose(rng).unwrap_or(&"admin_");
241        let systems = ["gl", "ap", "ar", "fa", "mm", "sd", "fi", "co", "hr", "pm"];
242        let system = systems.choose(rng).unwrap_or(&"gl");
243        format!("{}{}", prefix, system)
244    }
245
246    fn interface_account(&self, rng: &mut impl Rng) -> String {
247        let prefix = self.interface_prefixes.choose(rng).unwrap_or(&"INT_");
248        let systems = [
249            "SAP",
250            "ORACLE",
251            "SALESFORCE",
252            "WORKDAY",
253            "NETSUITE",
254            "DYNAMICS",
255            "SAGE",
256            "QUICKBOOKS",
257            "CONCUR",
258            "COUPA",
259            "ARIBA",
260            "BLACKLINE",
261            "HYPERION",
262            "ANAPLAN",
263        ];
264        let system = systems.choose(rng).unwrap_or(&"SAP");
265        format!("{}{}", prefix, system)
266    }
267}
268
269/// Email generator with corporate patterns.
270#[derive(Debug, Clone)]
271pub struct EmailGenerator {
272    domain: String,
273    patterns: Vec<EmailPattern>,
274}
275
276#[derive(Debug, Clone, Copy)]
277enum EmailPattern {
278    FirstDotLast,
279    FirstInitialLast,
280    FirstUnderscoreLast,
281    LastDotFirst,
282    FirstOnly,
283}
284
285impl Default for EmailGenerator {
286    fn default() -> Self {
287        Self::new("company.com")
288    }
289}
290
291impl EmailGenerator {
292    /// Create a new email generator with the specified domain.
293    pub fn new(domain: &str) -> Self {
294        Self {
295            domain: domain.to_string(),
296            patterns: vec![
297                EmailPattern::FirstDotLast,
298                EmailPattern::FirstDotLast,
299                EmailPattern::FirstDotLast, // Weight toward common pattern
300                EmailPattern::FirstInitialLast,
301                EmailPattern::FirstUnderscoreLast,
302                EmailPattern::LastDotFirst,
303                EmailPattern::FirstOnly,
304            ],
305        }
306    }
307
308    /// Set the email domain.
309    pub fn with_domain(mut self, domain: &str) -> Self {
310        self.domain = domain.to_string();
311        self
312    }
313
314    /// Generate an email address.
315    pub fn generate(&self, first_name: &str, last_name: &str, rng: &mut impl Rng) -> String {
316        let pattern = self
317            .patterns
318            .choose(rng)
319            .unwrap_or(&EmailPattern::FirstDotLast);
320        self.generate_with_pattern(first_name, last_name, *pattern)
321    }
322
323    /// Generate an email with a specific pattern.
324    fn generate_with_pattern(
325        &self,
326        first_name: &str,
327        last_name: &str,
328        pattern: EmailPattern,
329    ) -> String {
330        let first = self.sanitize_for_email(first_name);
331        let last = self.sanitize_for_email(last_name);
332
333        let local_part = match pattern {
334            EmailPattern::FirstDotLast => format!("{}.{}", first, last),
335            EmailPattern::FirstInitialLast => {
336                let initial = first.chars().next().unwrap_or('x');
337                format!("{}{}", initial, last)
338            }
339            EmailPattern::FirstUnderscoreLast => format!("{}_{}", first, last),
340            EmailPattern::LastDotFirst => format!("{}.{}", last, first),
341            EmailPattern::FirstOnly => first,
342        };
343
344        format!("{}@{}", local_part, self.domain)
345    }
346
347    /// Generate a generic/functional email address.
348    pub fn generate_functional(&self, function: &str) -> String {
349        format!("{}@{}", function.to_lowercase(), self.domain)
350    }
351
352    fn sanitize_for_email(&self, name: &str) -> String {
353        name.chars()
354            .filter(|c| c.is_ascii_alphabetic())
355            .collect::<String>()
356            .to_lowercase()
357    }
358}
359
360#[cfg(test)]
361#[allow(clippy::unwrap_used)]
362mod tests {
363    use super::*;
364    use rand::SeedableRng;
365    use rand_chacha::ChaCha8Rng;
366
367    #[test]
368    fn test_initial_last_name_pattern() {
369        let mut rng = ChaCha8Rng::seed_from_u64(42);
370        let gen = UserIdGenerator::new();
371
372        let id =
373            gen.generate_with_pattern("John", "Smith", 0, UserIdPattern::InitialLastName, &mut rng);
374        assert_eq!(id, "JSMITH");
375
376        let id2 =
377            gen.generate_with_pattern("John", "Smith", 5, UserIdPattern::InitialLastName, &mut rng);
378        assert_eq!(id2, "JSMITH5");
379    }
380
381    #[test]
382    fn test_dot_separated_pattern() {
383        let mut rng = ChaCha8Rng::seed_from_u64(42);
384        let gen = UserIdGenerator::new();
385
386        let id =
387            gen.generate_with_pattern("John", "Smith", 0, UserIdPattern::DotSeparated, &mut rng);
388        assert_eq!(id, "john.smith");
389
390        let id2 =
391            gen.generate_with_pattern("John", "Smith", 3, UserIdPattern::DotSeparated, &mut rng);
392        assert_eq!(id2, "john.smith3");
393    }
394
395    #[test]
396    fn test_employee_number_pattern() {
397        let mut rng = ChaCha8Rng::seed_from_u64(42);
398        let gen = UserIdGenerator::new();
399
400        let id = gen.generate_with_pattern(
401            "John",
402            "Smith",
403            12345,
404            UserIdPattern::EmployeeNumber,
405            &mut rng,
406        );
407        assert_eq!(id, "E00012345");
408    }
409
410    #[test]
411    fn test_system_account() {
412        let mut rng = ChaCha8Rng::seed_from_u64(42);
413        let gen = UserIdGenerator::new();
414
415        let id = gen.generate_system_account(&mut rng);
416        assert!(
417            id.starts_with("SVC_")
418                || id.starts_with("SYS_")
419                || id.starts_with("BATCH_")
420                || id.starts_with("AUTO_")
421                || id.starts_with("SCHED_")
422        );
423    }
424
425    #[test]
426    fn test_interface_account() {
427        let gen = UserIdGenerator::new();
428        let id = gen.generate_interface_account("SAP");
429        assert_eq!(id, "INT_SAP");
430    }
431
432    #[test]
433    fn test_email_generation() {
434        let mut rng = ChaCha8Rng::seed_from_u64(42);
435        let gen = EmailGenerator::new("acme.com");
436
437        let email = gen.generate("John", "Smith", &mut rng);
438        assert!(email.ends_with("@acme.com"));
439        assert!(email.contains("john") || email.contains("smith") || email.contains("j"));
440    }
441
442    #[test]
443    fn test_email_with_non_ascii() {
444        let mut rng = ChaCha8Rng::seed_from_u64(42);
445        let gen = EmailGenerator::new("company.de");
446
447        let email = gen.generate("Jürgen", "Müller", &mut rng);
448        assert!(email.ends_with("@company.de"));
449        // Non-ASCII should be filtered out
450        assert!(!email.contains('ü'));
451    }
452
453    #[test]
454    fn test_functional_email() {
455        let gen = EmailGenerator::new("company.com");
456        let email = gen.generate_functional("accounts.payable");
457        assert_eq!(email, "accounts.payable@company.com");
458    }
459
460    #[test]
461    fn test_random_pattern_variety() {
462        let mut rng = ChaCha8Rng::seed_from_u64(42);
463        let gen = UserIdGenerator::new();
464
465        let mut patterns = std::collections::HashSet::new();
466        for i in 0..100 {
467            let id = gen.generate_random_pattern("John", "Smith", i, &mut rng);
468            patterns.insert(id);
469        }
470
471        // Should generate diverse IDs
472        assert!(patterns.len() > 10);
473    }
474}