1use once_cell::sync::Lazy;
4use lazy_static::lazy_static;
5use rand::prelude::SliceRandom;
6use rand::Rng;
7use serde_json::Value;
8use std::fs;
9
10static PROVINCES: Lazy<Vec<String>> = Lazy::new(|| {
12 load_data_from_file("data/regions/china/provinces.txt")
13});
14
15static STREET_SUFFIXES: Lazy<Vec<String>> = Lazy::new(|| {
17 load_data_from_file("data/regions/china/street_suffixes.txt")
18});
19
20fn load_data_from_file(file_path: &str) -> Vec<String> {
22 match fs::read_to_string(file_path) {
23 Ok(content) => content
24 .lines()
25 .map(|line| line.trim().to_string())
26 .filter(|line| !line.is_empty())
27 .collect(),
28 Err(_) => {
29 match file_path {
31 "data/regions/china/provinces.txt" => vec![
32 "北京市".to_string(), "天津市".to_string(), "河北省".to_string(),
33 "山西省".to_string(), "内蒙古自治区".to_string(), "辽宁省".to_string(),
34 "吉林省".to_string(), "黑龙江省".to_string(), "上海市".to_string(),
35 "江苏省".to_string(), "浙江省".to_string(), "安徽省".to_string()
36 ],
37 "data/regions/china/street_suffixes.txt" => vec![
38 "街".to_string(), "路".to_string(), "巷".to_string(),
39 "大道".to_string(), "胡同".to_string(), "里".to_string()
40 ],
41 _ => vec!["Default".to_string()]
42 }
43 }
44 }
45}
46
47lazy_static! {
48 static ref GB2312_RANGES: [(u32, u32); 72] = [
50 (0xB0A1, 0xB0FE), (0xB1A1, 0xB1FE), (0xB2A1, 0xB2FE), (0xB3A1, 0xB3FE), (0xB4A1, 0xB4FE), (0xB5A1, 0xB5FE), (0xB6A1, 0xB6FE), (0xB7A1, 0xB7FE), (0xB8A1, 0xB8FE), (0xB9A1, 0xB9FE), (0xBAA1, 0xBAFE), (0xBBA1, 0xBBFE), (0xBCA1, 0xBCFE), (0xBDA1, 0xBDFE), (0xBEA1, 0xBEFE), (0xBFA1, 0xBFFE), (0xC0A1, 0xC0FE), (0xC1A1, 0xC1FE), (0xC2A1, 0xC2FE), (0xC3A1, 0xC3FE), (0xC4A1, 0xC4FE), (0xC5A1, 0xC5FE), (0xC6A1, 0xC6FE), (0xC7A1, 0xC7FE), (0xC8A1, 0xC8FE), (0xC9A1, 0xC9FE), (0xCAA1, 0xCAFE), (0xCBA1, 0xCBFE), (0xCCA1, 0xCCFE), (0xCDA1, 0xCDFE), (0xCEA1, 0xCEFE), (0xCFA1, 0xCFFE), (0xD0A1, 0xD0FE), (0xD1A1, 0xD1FE), (0xD2A1, 0xD2FE), (0xD3A1, 0xD3FE), (0xD4A1, 0xD4FE), (0xD5A1, 0xD5FE), (0xD6A1, 0xD6FE), (0xD7A1, 0xD7FE), (0xD8A1, 0xD8FE), (0xD9A1, 0xD9FE), (0xDAA1, 0xDAFE), (0xDBA1, 0xDBFE), (0xDCA1, 0xDCFE), (0xDDA1, 0xDDFE), (0xDEA1, 0xDEFE), (0xDFA1, 0xDFFE), (0xE0A1, 0xE0FE), (0xE1A1, 0xE1FE), (0xE2A1, 0xE2FE), (0xE3A1, 0xE3FE), (0xE4A1, 0xE4FE), (0xE5A1, 0xE5FE), (0xE6A1, 0xE6FE), (0xE7A1, 0xE7FE), (0xE8A1, 0xE8FE), (0xE9A1, 0xE9FE), (0xEAA1, 0xEAFE), (0xEBA1, 0xEBFE), (0xECA1, 0xECFE), (0xEDA1, 0xEDFE), (0xEEA1, 0xEEFE), (0xEFA1, 0xEFFE), (0xF0A1, 0xF0FE), (0xF1A1, 0xF1FE), (0xF2A1, 0xF2FE), (0xF3A1, 0xF3FE), (0xF4A1, 0xF4FE), (0xF5A1, 0xF5FE), (0xF6A1, 0xF6FE), (0xF7A1, 0xF7FE) ];
126}
127
128pub fn province() -> Value {
130 let mut rng = rand::thread_rng();
131 Value::String(PROVINCES.choose(&mut rng).unwrap().clone())
132}
133
134pub fn rand_chinese(range: std::ops::Range<usize>) -> String {
136 let len = rand::Rng::gen_range(&mut rand::thread_rng(), range);
137 (0..len).map(|_| rand_hanzi()).collect()
138}
139
140pub fn random_street() -> Value {
142 let mut rng = rand::thread_rng();
143 let prefix_len = rng.gen_range(2..=4);
145 let suffix = STREET_SUFFIXES.choose(&mut rng).unwrap();
146
147 let prefix: String = (0..prefix_len).map(|_| rand_hanzi()).collect();
148
149 Value::String(format!("{}{}", prefix, suffix))
150}
151
152pub fn rand_hanzi() -> char {
154 let mut rng = rand::thread_rng();
155 let code = rng.gen_range(0x4E00..=0x9FA5);
163 std::char::from_u32(code).unwrap_or('汉')
164}
165
166pub fn generate_random_unicode() -> String {
172 let mut rng = rand::thread_rng();
173 let unicode_chars: Vec<char> = (0..1)
174 .map(|_| rng.gen_range(0x4E00..=0x9FFF) as u32)
175 .map(std::char::from_u32)
176 .flatten()
177 .collect();
178
179 unicode_chars.into_iter().collect()
180}
181
182pub fn is_uncommon_hanzi(c: char) -> bool {
183 let code = c as u32;
184
185 !GB2312_RANGES.iter().any(|&(start, end)| code >= start && code <= end)
186 || c.is_ascii_punctuation() }