Skip to main content

datasynth_core/templates/realism/
user_ids.rs

1//! Realistic corporate user ID generation.
2//!
3//! Generates user IDs in various corporate patterns including standard
4//! employee IDs, system accounts, and service accounts.
5
6use rand::seq::SliceRandom;
7use rand::Rng;
8use serde::{Deserialize, Serialize};
9
10/// User ID pattern types.
11#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
12#[serde(rename_all = "snake_case")]
13pub enum UserIdPattern {
14    /// First initial + last name + disambiguator (e.g., JSMITH001)
15    #[default]
16    InitialLastName,
17    /// First name + last name with dot (e.g., john.smith)
18    DotSeparated,
19    /// First name + underscore + last name (e.g., john_smith)
20    UnderscoreSeparated,
21    /// Last name + first initial (e.g., smithj)
22    LastNameInitial,
23    /// Employee number format (e.g., E00012345)
24    EmployeeNumber,
25    /// System account format (e.g., SVC_BATCH)
26    SystemAccount,
27    /// Admin account format (e.g., admin_gl)
28    AdminAccount,
29    /// Interface account format (e.g., INT_SAP)
30    InterfaceAccount,
31}
32
33/// User ID generator with multiple pattern support.
34#[derive(Debug, Clone)]
35pub struct UserIdGenerator {
36    default_pattern: UserIdPattern,
37    system_prefixes: Vec<&'static str>,
38    admin_prefixes: Vec<&'static str>,
39    interface_prefixes: Vec<&'static str>,
40    system_suffixes: Vec<&'static str>,
41}
42
43impl Default for UserIdGenerator {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49impl UserIdGenerator {
50    /// Create a new user ID generator.
51    pub fn new() -> Self {
52        Self {
53            default_pattern: UserIdPattern::InitialLastName,
54            system_prefixes: vec!["SVC_", "SYS_", "BATCH_", "AUTO_", "SCHED_"],
55            admin_prefixes: vec!["admin_", "ADMIN_", "adm_", "root_"],
56            interface_prefixes: vec!["INT_", "IF_", "INTF_", "API_", "EDI_"],
57            system_suffixes: vec![
58                "BATCH",
59                "PROCESS",
60                "RECON",
61                "IMPORT",
62                "EXPORT",
63                "SYNC",
64                "SCHEDULER",
65                "MONITOR",
66                "BACKUP",
67                "ARCHIVE",
68                "CLEANUP",
69                "POSTING",
70                "INTERFACE",
71            ],
72        }
73    }
74
75    /// Generate a user ID using the default pattern.
76    pub fn generate(
77        &self,
78        first_name: &str,
79        last_name: &str,
80        index: usize,
81        rng: &mut impl Rng,
82    ) -> String {
83        self.generate_with_pattern(first_name, last_name, index, self.default_pattern, rng)
84    }
85
86    /// Generate a user ID with a specific pattern.
87    pub fn generate_with_pattern(
88        &self,
89        first_name: &str,
90        last_name: &str,
91        index: usize,
92        pattern: UserIdPattern,
93        rng: &mut impl Rng,
94    ) -> String {
95        match pattern {
96            UserIdPattern::InitialLastName => self.initial_last_name(first_name, last_name, index),
97            UserIdPattern::DotSeparated => self.dot_separated(first_name, last_name, index),
98            UserIdPattern::UnderscoreSeparated => {
99                self.underscore_separated(first_name, last_name, index)
100            }
101            UserIdPattern::LastNameInitial => self.last_name_initial(first_name, last_name, index),
102            UserIdPattern::EmployeeNumber => self.employee_number(index),
103            UserIdPattern::SystemAccount => self.system_account(rng),
104            UserIdPattern::AdminAccount => self.admin_account(rng),
105            UserIdPattern::InterfaceAccount => self.interface_account(rng),
106        }
107    }
108
109    /// Generate a random pattern user ID.
110    pub fn generate_random_pattern(
111        &self,
112        first_name: &str,
113        last_name: &str,
114        index: usize,
115        rng: &mut impl Rng,
116    ) -> String {
117        let pattern = self.select_pattern(rng);
118        self.generate_with_pattern(first_name, last_name, index, pattern, rng)
119    }
120
121    /// Generate a system account ID.
122    pub fn generate_system_account(&self, rng: &mut impl Rng) -> String {
123        self.system_account(rng)
124    }
125
126    /// Generate an admin account ID.
127    pub fn generate_admin_account(&self, rng: &mut impl Rng) -> String {
128        self.admin_account(rng)
129    }
130
131    /// Generate an interface account ID.
132    pub fn generate_interface_account(&self, system_name: &str) -> String {
133        format!("INT_{}", system_name.to_uppercase())
134    }
135
136    fn select_pattern(&self, rng: &mut impl Rng) -> UserIdPattern {
137        let roll: f64 = rng.gen();
138        if roll < 0.40 {
139            UserIdPattern::InitialLastName
140        } else if roll < 0.65 {
141            UserIdPattern::DotSeparated
142        } else if roll < 0.80 {
143            UserIdPattern::LastNameInitial
144        } else if roll < 0.90 {
145            UserIdPattern::UnderscoreSeparated
146        } else {
147            UserIdPattern::EmployeeNumber
148        }
149    }
150
151    fn initial_last_name(&self, first_name: &str, last_name: &str, index: usize) -> String {
152        let first_initial = first_name
153            .chars()
154            .next()
155            .unwrap_or('X')
156            .to_ascii_uppercase();
157        let last_part: String = last_name
158            .chars()
159            .filter(|c| c.is_ascii_alphabetic())
160            .take(7)
161            .collect::<String>()
162            .to_uppercase();
163
164        if index == 0 {
165            format!("{}{}", first_initial, last_part)
166        } else {
167            format!("{}{}{}", first_initial, last_part, index)
168        }
169    }
170
171    fn dot_separated(&self, first_name: &str, last_name: &str, index: usize) -> String {
172        let first: String = first_name
173            .chars()
174            .filter(|c| c.is_ascii_alphabetic())
175            .collect::<String>()
176            .to_lowercase();
177        let last: String = last_name
178            .chars()
179            .filter(|c| c.is_ascii_alphabetic())
180            .collect::<String>()
181            .to_lowercase();
182
183        if index == 0 {
184            format!("{}.{}", first, last)
185        } else {
186            format!("{}.{}{}", first, last, index)
187        }
188    }
189
190    fn underscore_separated(&self, first_name: &str, last_name: &str, index: usize) -> String {
191        let first: String = first_name
192            .chars()
193            .filter(|c| c.is_ascii_alphabetic())
194            .collect::<String>()
195            .to_lowercase();
196        let last: String = last_name
197            .chars()
198            .filter(|c| c.is_ascii_alphabetic())
199            .collect::<String>()
200            .to_lowercase();
201
202        if index == 0 {
203            format!("{}_{}", first, last)
204        } else {
205            format!("{}_{}{}", first, last, index)
206        }
207    }
208
209    fn last_name_initial(&self, first_name: &str, last_name: &str, index: usize) -> String {
210        let last: String = last_name
211            .chars()
212            .filter(|c| c.is_ascii_alphabetic())
213            .take(8)
214            .collect::<String>()
215            .to_lowercase();
216        let first_initial = first_name
217            .chars()
218            .next()
219            .unwrap_or('x')
220            .to_ascii_lowercase();
221
222        if index == 0 {
223            format!("{}{}", last, first_initial)
224        } else {
225            format!("{}{}{}", last, first_initial, index)
226        }
227    }
228
229    fn employee_number(&self, index: usize) -> String {
230        format!("E{:08}", index)
231    }
232
233    fn system_account(&self, rng: &mut impl Rng) -> String {
234        let prefix = self.system_prefixes.choose(rng).unwrap_or(&"SVC_");
235        let suffix = self.system_suffixes.choose(rng).unwrap_or(&"BATCH");
236        format!("{}{}", prefix, suffix)
237    }
238
239    fn admin_account(&self, rng: &mut impl Rng) -> String {
240        let prefix = self.admin_prefixes.choose(rng).unwrap_or(&"admin_");
241        let systems = ["gl", "ap", "ar", "fa", "mm", "sd", "fi", "co", "hr", "pm"];
242        let system = systems.choose(rng).unwrap_or(&"gl");
243        format!("{}{}", prefix, system)
244    }
245
246    fn interface_account(&self, rng: &mut impl Rng) -> String {
247        let prefix = self.interface_prefixes.choose(rng).unwrap_or(&"INT_");
248        let systems = [
249            "SAP",
250            "ORACLE",
251            "SALESFORCE",
252            "WORKDAY",
253            "NETSUITE",
254            "DYNAMICS",
255            "SAGE",
256            "QUICKBOOKS",
257            "CONCUR",
258            "COUPA",
259            "ARIBA",
260            "BLACKLINE",
261            "HYPERION",
262            "ANAPLAN",
263        ];
264        let system = systems.choose(rng).unwrap_or(&"SAP");
265        format!("{}{}", prefix, system)
266    }
267}
268
269/// Email generator with corporate patterns.
270#[derive(Debug, Clone)]
271pub struct EmailGenerator {
272    domain: String,
273    patterns: Vec<EmailPattern>,
274}
275
276#[derive(Debug, Clone, Copy)]
277#[allow(dead_code)]
278enum EmailPattern {
279    FirstDotLast,
280    FirstInitialLast,
281    FirstUnderscoreLast,
282    LastDotFirst,
283    FirstOnly,
284}
285
286impl Default for EmailGenerator {
287    fn default() -> Self {
288        Self::new("company.com")
289    }
290}
291
292impl EmailGenerator {
293    /// Create a new email generator with the specified domain.
294    pub fn new(domain: &str) -> Self {
295        Self {
296            domain: domain.to_string(),
297            patterns: vec![
298                EmailPattern::FirstDotLast,
299                EmailPattern::FirstDotLast,
300                EmailPattern::FirstDotLast, // Weight toward common pattern
301                EmailPattern::FirstInitialLast,
302                EmailPattern::FirstUnderscoreLast,
303            ],
304        }
305    }
306
307    /// Set the email domain.
308    pub fn with_domain(mut self, domain: &str) -> Self {
309        self.domain = domain.to_string();
310        self
311    }
312
313    /// Generate an email address.
314    pub fn generate(&self, first_name: &str, last_name: &str, rng: &mut impl Rng) -> String {
315        let pattern = self
316            .patterns
317            .choose(rng)
318            .unwrap_or(&EmailPattern::FirstDotLast);
319        self.generate_with_pattern(first_name, last_name, *pattern)
320    }
321
322    /// Generate an email with a specific pattern.
323    fn generate_with_pattern(
324        &self,
325        first_name: &str,
326        last_name: &str,
327        pattern: EmailPattern,
328    ) -> String {
329        let first = self.sanitize_for_email(first_name);
330        let last = self.sanitize_for_email(last_name);
331
332        let local_part = match pattern {
333            EmailPattern::FirstDotLast => format!("{}.{}", first, last),
334            EmailPattern::FirstInitialLast => {
335                let initial = first.chars().next().unwrap_or('x');
336                format!("{}{}", initial, last)
337            }
338            EmailPattern::FirstUnderscoreLast => format!("{}_{}", first, last),
339            EmailPattern::LastDotFirst => format!("{}.{}", last, first),
340            EmailPattern::FirstOnly => first,
341        };
342
343        format!("{}@{}", local_part, self.domain)
344    }
345
346    /// Generate a generic/functional email address.
347    pub fn generate_functional(&self, function: &str) -> String {
348        format!("{}@{}", function.to_lowercase(), self.domain)
349    }
350
351    fn sanitize_for_email(&self, name: &str) -> String {
352        name.chars()
353            .filter(|c| c.is_ascii_alphabetic())
354            .collect::<String>()
355            .to_lowercase()
356    }
357}
358
359#[cfg(test)]
360#[allow(clippy::unwrap_used)]
361mod tests {
362    use super::*;
363    use rand::SeedableRng;
364    use rand_chacha::ChaCha8Rng;
365
366    #[test]
367    fn test_initial_last_name_pattern() {
368        let mut rng = ChaCha8Rng::seed_from_u64(42);
369        let gen = UserIdGenerator::new();
370
371        let id =
372            gen.generate_with_pattern("John", "Smith", 0, UserIdPattern::InitialLastName, &mut rng);
373        assert_eq!(id, "JSMITH");
374
375        let id2 =
376            gen.generate_with_pattern("John", "Smith", 5, UserIdPattern::InitialLastName, &mut rng);
377        assert_eq!(id2, "JSMITH5");
378    }
379
380    #[test]
381    fn test_dot_separated_pattern() {
382        let mut rng = ChaCha8Rng::seed_from_u64(42);
383        let gen = UserIdGenerator::new();
384
385        let id =
386            gen.generate_with_pattern("John", "Smith", 0, UserIdPattern::DotSeparated, &mut rng);
387        assert_eq!(id, "john.smith");
388
389        let id2 =
390            gen.generate_with_pattern("John", "Smith", 3, UserIdPattern::DotSeparated, &mut rng);
391        assert_eq!(id2, "john.smith3");
392    }
393
394    #[test]
395    fn test_employee_number_pattern() {
396        let mut rng = ChaCha8Rng::seed_from_u64(42);
397        let gen = UserIdGenerator::new();
398
399        let id = gen.generate_with_pattern(
400            "John",
401            "Smith",
402            12345,
403            UserIdPattern::EmployeeNumber,
404            &mut rng,
405        );
406        assert_eq!(id, "E00012345");
407    }
408
409    #[test]
410    fn test_system_account() {
411        let mut rng = ChaCha8Rng::seed_from_u64(42);
412        let gen = UserIdGenerator::new();
413
414        let id = gen.generate_system_account(&mut rng);
415        assert!(
416            id.starts_with("SVC_")
417                || id.starts_with("SYS_")
418                || id.starts_with("BATCH_")
419                || id.starts_with("AUTO_")
420                || id.starts_with("SCHED_")
421        );
422    }
423
424    #[test]
425    fn test_interface_account() {
426        let gen = UserIdGenerator::new();
427        let id = gen.generate_interface_account("SAP");
428        assert_eq!(id, "INT_SAP");
429    }
430
431    #[test]
432    fn test_email_generation() {
433        let mut rng = ChaCha8Rng::seed_from_u64(42);
434        let gen = EmailGenerator::new("acme.com");
435
436        let email = gen.generate("John", "Smith", &mut rng);
437        assert!(email.ends_with("@acme.com"));
438        assert!(email.contains("john") || email.contains("smith") || email.contains("j"));
439    }
440
441    #[test]
442    fn test_email_with_non_ascii() {
443        let mut rng = ChaCha8Rng::seed_from_u64(42);
444        let gen = EmailGenerator::new("company.de");
445
446        let email = gen.generate("Jürgen", "Müller", &mut rng);
447        assert!(email.ends_with("@company.de"));
448        // Non-ASCII should be filtered out
449        assert!(!email.contains('ü'));
450    }
451
452    #[test]
453    fn test_functional_email() {
454        let gen = EmailGenerator::new("company.com");
455        let email = gen.generate_functional("accounts.payable");
456        assert_eq!(email, "accounts.payable@company.com");
457    }
458
459    #[test]
460    fn test_random_pattern_variety() {
461        let mut rng = ChaCha8Rng::seed_from_u64(42);
462        let gen = UserIdGenerator::new();
463
464        let mut patterns = std::collections::HashSet::new();
465        for i in 0..100 {
466            let id = gen.generate_random_pattern("John", "Smith", i, &mut rng);
467            patterns.insert(id);
468        }
469
470        // Should generate diverse IDs
471        assert!(patterns.len() > 10);
472    }
473}