dataforge/generators/
name.rs

1//! 姓名生成器模块
2//! 
3//! 使用智能算法生成真实的中文和英文姓名,而不是简单的枚举列表
4
5use rand::Rng;
6use rand::seq::SliceRandom;
7
8/// 中文姓氏字符集 - 常用作姓氏的汉字特征
9const ZH_SURNAME_CHARS: &[char] = &[
10    '王', '李', '张', '刘', '陈', '杨', '赵', '黄', '周', '吴',
11    '徐', '孙', '胡', '朱', '高', '林', '何', '郭', '马', '罗',
12    '梁', '宋', '郑', '谢', '韩', '唐', '冯', '于', '董', '萧',
13    '程', '曹', '袁', '邓', '许', '傅', '沈', '曾', '彭', '吕',
14    '苏', '卢', '蒋', '蔡', '贾', '丁', '魏', '薛', '叶', '阎'
15];
16
17/// 中文名字常用字 - 按寓意分类
18const ZH_GIVEN_NAME_POSITIVE: &[char] = &[
19    '伟', '强', '明', '华', '建', '文', '志', '勇', '军', '杰',
20    '涛', '超', '辉', '鹏', '磊', '峰', '龙', '虎', '豪', '俊'
21];
22
23const ZH_GIVEN_NAME_VIRTUE: &[char] = &[
24    '德', '仁', '义', '礼', '智', '信', '忠', '孝', '廉', '耻',
25    '善', '美', '真', '诚', '正', '直', '清', '雅', '和', '谐'
26];
27
28const ZH_GIVEN_NAME_NATURE: &[char] = &[
29    '山', '水', '林', '森', '海', '江', '河', '湖', '云', '雨',
30    '雪', '风', '雷', '电', '日', '月', '星', '光', '火', '土'
31];
32
33const ZH_GIVEN_NAME_FEMALE: &[char] = &[
34    '芳', '娜', '敏', '静', '丽', '艳', '娟', '秀', '美', '玲',
35    '婷', '雯', '慧', '琳', '颖', '洁', '莉', '萍', '红', '燕'
36];
37
38/// 英文姓氏音节模式
39const EN_SURNAME_PREFIXES: &[&str] = &[
40    "Sm", "John", "Will", "Br", "Jon", "Garc", "Mill", "Dav",
41    "Rodr", "Mart", "Hern", "Lop", "Gonz", "Wils", "And",
42    "Thom", "Tayl", "Moor", "Jack", "Mart", "Per", "Thomp",
43    "Wh", "Harr", "Sanch", "Cl", "Ram", "Lew", "Rob"
44];
45
46const EN_SURNAME_SUFFIXES: &[&str] = &[
47    "ith", "son", "iams", "own", "es", "ia", "er", "is",
48    "iguez", "inez", "andez", "ez", "alez", "on", "erson",
49    "as", "or", "e", "son", "in", "ee", "ez", "son",
50    "ite", "is", "ez", "ark", "irez", "is", "inson"
51];
52
53/// 英文名字音节模式
54const EN_GIVEN_NAME_PREFIXES: &[&str] = &[
55    "Jam", "Rob", "Joh", "Mich", "Dav", "Will", "Rich", "Thom",
56    "Chr", "Char", "Dan", "Matt", "Ant", "Mar", "Don",
57    "Stev", "Pau", "And", "Josh", "Ken", "Kev", "Br", "Geo",
58    "Tim", "Ron", "Jas", "Edw", "Jeff", "Ry", "Jac", "Gar"
59];
60
61const EN_GIVEN_NAME_SUFFIXES: &[&str] = &[
62    "es", "ert", "n", "ael", "id", "iam", "ard", "as",
63    "istopher", "les", "iel", "hew", "hony", "k", "ald",
64    "en", "l", "rew", "ua", "neth", "in", "ian", "rge",
65    "othy", "ald", "on", "ard", "rey", "an", "ob", "y"
66];
67
68/// 生成中文全名
69pub fn zh_cn_fullname() -> String {
70    let surname = zh_cn_surname();
71    let given_name = zh_cn_given_name();
72    format!("{}{}", surname, given_name)
73}
74
75/// 生成中文姓氏
76/// 使用智能算法:70%概率使用常见姓氏,30%概率生成稀有姓氏
77pub fn zh_cn_surname() -> String {
78    let mut rng = rand::thread_rng();
79    
80    if rng.gen_bool(0.7) {
81        // 70% 概率使用常见姓氏
82        let idx = rng.gen_range(0..ZH_SURNAME_CHARS.len());
83        ZH_SURNAME_CHARS[idx].to_string()
84    } else {
85        // 30% 概率生成稀有姓氏
86        generate_rare_zh_surname()
87    }
88}
89
90/// 生成稀有中文姓氏
91fn generate_rare_zh_surname() -> String {
92    let mut rng = rand::thread_rng();
93    
94    // 复姓概率 20%
95    if rng.gen_bool(0.2) {
96        // 生成复姓(如:欧阳、司马、上官等)
97        let first_char = generate_surname_char();
98        let second_char = generate_surname_char();
99        format!("{}{}", first_char, second_char)
100    } else {
101        // 生成单字稀有姓氏
102        generate_surname_char().to_string()
103    }
104}
105
106/// 生成适合作为姓氏的汉字
107fn generate_surname_char() -> char {
108    let mut rng = rand::thread_rng();
109    
110    // 使用汉字的特定Unicode范围,倾向于选择适合姓氏的字
111    let ranges = [
112        (0x4E00, 0x4E99), // 一些基础汉字
113        (0x5100, 0x5199), // 包含很多姓氏用字
114        (0x5200, 0x5299), // 更多姓氏相关字符
115    ];
116    
117    let range = ranges[rng.gen_range(0..ranges.len())];
118    let code = rng.gen_range(range.0..=range.1);
119    
120    std::char::from_u32(code).unwrap_or_else(|| {
121        // 如果生成失败,回退到常见姓氏
122        ZH_SURNAME_CHARS[rng.gen_range(0..ZH_SURNAME_CHARS.len())]
123    })
124}
125
126/// 生成中文名字
127/// 根据性别和寓意智能生成
128pub fn zh_cn_given_name() -> String {
129    zh_cn_given_name_with_gender(None)
130}
131
132/// 根据性别生成中文名字
133pub fn zh_cn_given_name_with_gender(gender: Option<Gender>) -> String {
134    let mut rng = rand::thread_rng();
135    let name_length = rng.gen_range(1..=2); // 1-2个字的名字
136    
137    let mut name = String::new();
138    
139    for i in 0..name_length {
140        let char = if i == 0 {
141            // 第一个字更注重寓意
142            generate_meaningful_char(gender)
143        } else {
144            // 第二个字可以更灵活
145            generate_complementary_char(gender)
146        };
147        name.push(char);
148    }
149    
150    name
151}
152
153/// 性别枚举
154#[derive(Clone, Copy)]
155pub enum Gender {
156    Male,
157    Female,
158}
159
160/// 生成有寓意的汉字
161fn generate_meaningful_char(gender: Option<Gender>) -> char {
162    let mut rng = rand::thread_rng();
163    
164    match gender {
165        Some(Gender::Female) => {
166            // 女性名字倾向于柔美字符
167            if rng.gen_bool(0.6) {
168                ZH_GIVEN_NAME_FEMALE[rng.gen_range(0..ZH_GIVEN_NAME_FEMALE.len())]
169            } else {
170                ZH_GIVEN_NAME_VIRTUE[rng.gen_range(0..ZH_GIVEN_NAME_VIRTUE.len())]
171            }
172        },
173        Some(Gender::Male) => {
174            // 男性名字倾向于阳刚字符
175            if rng.gen_bool(0.6) {
176                ZH_GIVEN_NAME_POSITIVE[rng.gen_range(0..ZH_GIVEN_NAME_POSITIVE.len())]
177            } else {
178                ZH_GIVEN_NAME_VIRTUE[rng.gen_range(0..ZH_GIVEN_NAME_VIRTUE.len())]
179            }
180        },
181        None => {
182            // 随机选择类别
183            let categories = [
184                &ZH_GIVEN_NAME_POSITIVE,
185                &ZH_GIVEN_NAME_VIRTUE,
186                &ZH_GIVEN_NAME_NATURE,
187                &ZH_GIVEN_NAME_FEMALE,
188            ];
189            let category = categories[rng.gen_range(0..categories.len())];
190            category[rng.gen_range(0..category.len())]
191        }
192    }
193}
194
195/// 生成互补的汉字
196fn generate_complementary_char(gender: Option<Gender>) -> char {
197    let mut rng = rand::thread_rng();
198    
199    // 30% 概率使用自然相关字符,70% 概率使用其他有意义字符
200    if rng.gen_bool(0.3) {
201        ZH_GIVEN_NAME_NATURE[rng.gen_range(0..ZH_GIVEN_NAME_NATURE.len())]
202    } else {
203        generate_meaningful_char(gender)
204    }
205}
206
207/// 生成英文全名
208pub fn en_us_fullname() -> String {
209    let given_name = en_us_given_name();
210    let surname = en_us_surname();
211    format!("{} {}", given_name, surname)
212}
213
214/// 生成简单的英文名字
215pub fn en_first_name() -> String {
216    en_us_given_name()
217}
218
219/// 生成简单的英文姓氏
220pub fn en_last_name() -> String {
221    en_us_surname()
222}
223
224/// 生成英文姓氏
225/// 使用音节组合算法生成真实感的英文姓氏
226pub fn en_us_surname() -> String {
227    let mut rng = rand::thread_rng();
228    
229    // 80% 概率生成组合姓氏,20% 概率生成简单姓氏
230    if rng.gen_bool(0.8) {
231        let prefix = EN_SURNAME_PREFIXES[rng.gen_range(0..EN_SURNAME_PREFIXES.len())];
232        let suffix = EN_SURNAME_SUFFIXES[rng.gen_range(0..EN_SURNAME_SUFFIXES.len())];
233        
234        // 避免重复音节
235        if prefix.to_lowercase().ends_with(&suffix.chars().next().unwrap().to_lowercase().to_string()) {
236            format!("{}{}", prefix, &suffix[1..])
237        } else {
238            format!("{}{}", prefix, suffix)
239        }
240    } else {
241        // 生成简单的单音节姓氏
242        generate_simple_english_surname()
243    }
244}
245
246/// 生成简单的英文姓氏
247fn generate_simple_english_surname() -> String {
248    let mut rng = rand::thread_rng();
249    let consonants = ['B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W'];
250    let vowels = ['a', 'e', 'i', 'o', 'u'];
251    
252    let mut surname = String::new();
253    
254    // 辅音开头
255    surname.push(consonants[rng.gen_range(0..consonants.len())]);
256    
257    // 添加元音
258    surname.push(vowels[rng.gen_range(0..vowels.len())]);
259    
260    // 可能添加辅音结尾
261    if rng.gen_bool(0.7) {
262        surname.push(consonants[rng.gen_range(0..consonants.len())].to_lowercase().next().unwrap());
263    }
264    
265    surname
266}
267
268/// 生成英文名字
269/// 使用音节组合生成自然的英文名字
270pub fn en_us_given_name() -> String {
271    en_us_given_name_with_gender(None)
272}
273
274/// 根据性别生成英文名字
275pub fn en_us_given_name_with_gender(gender: Option<Gender>) -> String {
276    let mut rng = rand::thread_rng();
277    
278    // 70% 概率使用音节组合,30% 概率生成创新名字
279    if rng.gen_bool(0.7) {
280        let prefix = EN_GIVEN_NAME_PREFIXES[rng.gen_range(0..EN_GIVEN_NAME_PREFIXES.len())];
281        let suffix = EN_GIVEN_NAME_SUFFIXES[rng.gen_range(0..EN_GIVEN_NAME_SUFFIXES.len())];
282        
283        let mut name = format!("{}{}", prefix, suffix);
284        
285        // 根据性别调整名字结尾
286        match gender {
287            Some(Gender::Female) => {
288                if rng.gen_bool(0.4) && !name.ends_with('a') && !name.ends_with('e') {
289                    name.push('a'); // 女性名字常以a结尾
290                }
291            },
292            Some(Gender::Male) => {
293                if name.ends_with('a') && rng.gen_bool(0.6) {
294                    name.pop(); // 男性名字较少以a结尾
295                }
296            },
297            None => {}
298        }
299        
300        name
301    } else {
302        generate_creative_english_name(gender)
303    }
304}
305
306/// 生成创新的英文名字
307fn generate_creative_english_name(gender: Option<Gender>) -> String {
308    let mut rng = rand::thread_rng();
309    let consonants = ['B', 'C', 'D', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'R', 'S', 'T', 'V', 'W', 'Z'];
310    let vowels = ['a', 'e', 'i', 'o', 'u'];
311    
312    let mut name = String::new();
313    let length = rng.gen_range(4..=8);
314    
315    for i in 0..length {
316        if i % 2 == 0 {
317            // 偶数位置放辅音
318            let consonant = consonants[rng.gen_range(0..consonants.len())];
319            if i == 0 {
320                name.push(consonant); // 首字母大写
321            } else {
322                name.push(consonant.to_lowercase().next().unwrap());
323            }
324        } else {
325            // 奇数位置放元音
326            name.push(vowels[rng.gen_range(0..vowels.len())]);
327        }
328    }
329    
330    // 根据性别调整结尾
331    match gender {
332        Some(Gender::Female) if !name.ends_with('a') && !name.ends_with('e') && rng.gen_bool(0.5) => {
333            name.push('a');
334        },
335        _ => {}
336    }
337    
338    name
339}
340
341/// 生成指定长度的中文姓名
342pub fn zh_cn_name_with_length(min_len: usize, max_len: usize) -> String {
343    let mut rng = rand::thread_rng();
344    let total_len = rng.gen_range(min_len..=max_len);
345    
346    let surname = zh_cn_surname();
347    let surname_len = surname.chars().count();
348    
349    if total_len <= surname_len {
350        return surname;
351    }
352    
353    let given_name_len = total_len - surname_len;
354    let mut given_name = String::new();
355    
356    for _ in 0..given_name_len {
357        given_name.push(generate_meaningful_char(None));
358    }
359    
360    format!("{}{}", surname, given_name)
361}
362
363/// 生成男性中文全名
364pub fn zh_cn_male_fullname() -> String {
365    let surname = zh_cn_surname();
366    let given_name = zh_cn_given_name_with_gender(Some(Gender::Male));
367    format!("{}{}", surname, given_name)
368}
369
370/// 生成女性中文全名
371pub fn zh_cn_female_fullname() -> String {
372    let surname = zh_cn_surname();
373    let given_name = zh_cn_given_name_with_gender(Some(Gender::Female));
374    format!("{}{}", surname, given_name)
375}
376
377/// 生成男性英文全名
378pub fn en_us_male_fullname() -> String {
379    let given_name = en_us_given_name_with_gender(Some(Gender::Male));
380    let surname = en_us_surname();
381    format!("{} {}", given_name, surname)
382}
383
384/// 生成女性英文全名
385pub fn en_us_female_fullname() -> String {
386    let given_name = en_us_given_name_with_gender(Some(Gender::Female));
387    let surname = en_us_surname();
388    format!("{} {}", given_name, surname)
389}
390
391/// 生成带中间名的英文全名
392pub fn en_us_fullname_with_middle() -> String {
393    let given_name = en_us_given_name();
394    let middle_name = en_us_given_name();
395    let surname = en_us_surname();
396    format!("{} {} {}", given_name, middle_name, surname)
397}
398
399/// 生成英文名字缩写
400pub fn en_us_initials() -> String {
401    let mut rng = rand::thread_rng();
402    let consonants = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'];
403    
404    let first_initial = consonants[rng.gen_range(0..consonants.len())];
405    let last_initial = consonants[rng.gen_range(0..consonants.len())];
406    
407    format!("{}.{}.", first_initial, last_initial)
408}
409
410/// 生成中文姓氏(别名)
411pub fn zh_cn_lastname() -> String {
412    zh_cn_surname()
413}
414
415/// 生成中文名字(别名)
416pub fn zh_cn_firstname() -> String {
417    zh_cn_given_name()
418}
419
420/// 生成英文姓氏(别名)
421pub fn en_us_lastname() -> String {
422    en_us_surname()
423}
424
425/// 生成英文名字(别名)
426pub fn en_us_firstname() -> String {
427    en_us_given_name()
428}
429
430/// 生成中文公司名称
431pub fn zh_cn_company() -> String {
432    let mut rng = rand::thread_rng();
433    let prefixes = ["北京", "上海", "深圳", "广州", "杭州", "成都", "武汉", "西安"];
434    let types = ["科技", "信息", "网络", "软件", "数据", "智能", "创新", "发展"];
435    let suffixes = ["有限公司", "股份有限公司", "科技有限公司", "集团有限公司"];
436    
437    let prefix = prefixes.choose(&mut rng).unwrap();
438    let type_name = types.choose(&mut rng).unwrap();
439    let suffix = suffixes.choose(&mut rng).unwrap();
440    
441    format!("{}{}{}", prefix, type_name, suffix)
442}
443
444/// 生成日文姓名(平假名)
445pub fn ja_jp_fullname() -> String {
446    let mut rng = rand::thread_rng();
447    
448    // 日文姓氏音节
449    let surname_syllables = ["田", "中", "山", "木", "村", "井", "川", "原", "石", "小"];
450    let surname_endings = ["田", "中", "山", "木", "村", "井", "川", "原", "石", "野"];
451    
452    // 日文名字音节
453    let given_syllables = ["あ", "か", "さ", "た", "な", "は", "ま", "や", "ら", "わ"];
454    let given_endings = ["子", "美", "恵", "香", "代", "花", "愛", "里", "奈", "菜"];
455    
456    let surname = format!("{}{}", 
457        surname_syllables[rng.gen_range(0..surname_syllables.len())],
458        surname_endings[rng.gen_range(0..surname_endings.len())]
459    );
460    
461    let given_name = format!("{}{}", 
462        given_syllables[rng.gen_range(0..given_syllables.len())],
463        given_endings[rng.gen_range(0..given_endings.len())]
464    );
465    
466    format!("{} {}", surname, given_name)
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    #[test]
474    fn test_zh_cn_fullname() {
475        for _ in 0..100 {
476            let name = zh_cn_fullname();
477            assert!(!name.is_empty());
478            assert!(name.chars().count() >= 2);
479            assert!(name.chars().count() <= 4); // 最多4个字(复姓+双字名)
480        }
481    }
482
483    #[test]
484    fn test_zh_cn_surname_variety() {
485        let mut surnames = std::collections::HashSet::new();
486        for _ in 0..100 {
487            surnames.insert(zh_cn_surname());
488        }
489        // 应该生成多样化的姓氏
490        assert!(surnames.len() > 20);
491    }
492
493    #[test]
494    fn test_zh_cn_gender_names() {
495        let male_name = zh_cn_male_fullname();
496        let female_name = zh_cn_female_fullname();
497        
498        assert!(!male_name.is_empty());
499        assert!(!female_name.is_empty());
500        assert!(male_name.chars().count() >= 2);
501        assert!(female_name.chars().count() >= 2);
502    }
503
504    #[test]
505    fn test_en_us_fullname() {
506        for _ in 0..50 {
507            let name = en_us_fullname();
508            assert!(!name.is_empty());
509            assert!(name.contains(' '));
510            
511            let parts: Vec<&str> = name.split_whitespace().collect();
512            assert_eq!(parts.len(), 2); // 名 + 姓
513            
514            // 检查首字母大写
515            assert!(parts[0].chars().next().unwrap().is_uppercase());
516            assert!(parts[1].chars().next().unwrap().is_uppercase());
517        }
518    }
519
520    #[test]
521    fn test_en_us_surname_variety() {
522        let mut surnames = std::collections::HashSet::new();
523        for _ in 0..100 {
524            surnames.insert(en_us_surname());
525        }
526        // 应该生成多样化的姓氏
527        assert!(surnames.len() > 30);
528    }
529
530    #[test]
531    fn test_en_us_gender_names() {
532        let male_name = en_us_male_fullname();
533        let female_name = en_us_female_fullname();
534        
535        assert!(!male_name.is_empty());
536        assert!(!female_name.is_empty());
537        assert!(male_name.contains(' '));
538        assert!(female_name.contains(' '));
539    }
540
541    #[test]
542    fn test_zh_cn_name_with_length() {
543        for target_len in 2..=5 {
544            let name = zh_cn_name_with_length(target_len, target_len);
545            let char_count = name.chars().count();
546            assert_eq!(char_count, target_len);
547        }
548        
549        // 测试范围
550        let name = zh_cn_name_with_length(2, 4);
551        let char_count = name.chars().count();
552        assert!(char_count >= 2 && char_count <= 4);
553    }
554
555    #[test]
556    fn test_en_us_fullname_with_middle() {
557        let name = en_us_fullname_with_middle();
558        let parts: Vec<&str> = name.split_whitespace().collect();
559        assert_eq!(parts.len(), 3); // 名 + 中间名 + 姓
560    }
561
562    #[test]
563    fn test_en_us_initials() {
564        let initials = en_us_initials();
565        assert_eq!(initials.len(), 4); // "A.B." 格式
566        assert!(initials.contains('.'));
567        assert_eq!(initials.chars().filter(|&c| c == '.').count(), 2);
568    }
569
570    #[test]
571    fn test_ja_jp_fullname() {
572        let name = ja_jp_fullname();
573        assert!(!name.is_empty());
574        assert!(name.contains(' '));
575        
576        let parts: Vec<&str> = name.split_whitespace().collect();
577        assert_eq!(parts.len(), 2);
578    }
579
580    #[test]
581    fn test_name_generation_performance() {
582        use std::time::Instant;
583        
584        let start = Instant::now();
585        for _ in 0..1000 {
586            zh_cn_fullname();
587            en_us_fullname();
588        }
589        let duration = start.elapsed();
590        
591        // 应该能在合理时间内完成
592        assert!(duration.as_millis() < 1000);
593    }
594
595    #[test]
596    fn test_name_uniqueness() {
597        let mut zh_names = std::collections::HashSet::new();
598        let mut en_names = std::collections::HashSet::new();
599        
600        for _ in 0..200 {
601            zh_names.insert(zh_cn_fullname());
602            en_names.insert(en_us_fullname());
603        }
604        
605        // 大部分名字应该是唯一的
606        assert!(zh_names.len() > 150);
607        assert!(en_names.len() > 150);
608    }
609}