1use crate::error::ParseError;
2use crate::config::{Configuration, CountryInfo};
3use crate::ParserConfig;
4
5pub fn parse_country_code(text: &str) -> Result<CountryInfo, ParseError> {
7 parse_country_code_with_config(text, &ParserConfig::default())
8}
9
10pub fn parse_country_code_with_config(
12 text: &str,
13 _config: &ParserConfig,
14) -> Result<CountryInfo, ParseError> {
15 if text.trim().is_empty() {
17 return Err(ParseError::invalid_input("输入文本为空"));
18 }
19
20 if text.len() > 1024 {
21 return Err(ParseError::invalid_input("输入文本过长"));
22 }
23
24 let config_data = Configuration::load()
26 .map_err(|e| ParseError::config_error(&format!("配置加载失败: {}", e)))?;
27
28 match parse_abbreviations(text, &config_data) {
33 Ok(country_info) => return Ok(country_info),
34 Err(_) => {},
35 }
36
37 match parse_traditional_chinese_names(text, &config_data) {
38 Ok(country_info) => return Ok(country_info),
39 Err(_) => {},
40 }
41
42 match parse_simplified_chinese_names(text, &config_data) {
43 Ok(country_info) => return Ok(country_info),
44 Err(_) => {},
45 }
46
47 match parse_english_names(text, &config_data) {
48 Ok(country_info) => return Ok(country_info),
49 Err(_) => {},
50 }
51
52 let chars: Vec<char> = text.chars().collect();
55 for i in 0..chars.len().saturating_sub(2) {
56 if chars[i].is_alphabetic() && chars[i+1].is_alphabetic() && chars[i+2].is_alphabetic() {
57 let slice: String = chars[i..i+3].iter().collect();
58 let slice_upper = slice.to_uppercase();
59
60 for country in config_data.get_countries() {
61 if country.alpha3 == slice_upper {
62 let mut valid = true;
64 if i > 0 {
65 let prev_char = chars[i-1];
66 if !is_boundary_char(prev_char) && !prev_char.is_numeric() {
67 valid = false;
68 }
69 }
70
71 if valid {
72 return Ok(country.clone());
73 }
74 }
75 }
76 }
77 }
78
79 for i in 0..chars.len().saturating_sub(1) {
81 if chars[i].is_alphabetic() && chars[i+1].is_alphabetic() {
82 let slice: String = chars[i..i+2].iter().collect();
83 let slice_upper = slice.to_uppercase();
84
85 for country in config_data.get_countries() {
86 if country.alpha2 == slice_upper {
87 let mut valid = true;
89 if i > 0 {
90 let prev_char = chars[i-1];
91 if !is_boundary_char(prev_char) && !prev_char.is_numeric() {
92 valid = false;
93 }
94 }
95
96 if valid {
97 return Ok(country.clone());
98 }
99 }
100 }
101 }
102 }
103
104 for country in config_data.get_countries() {
106 if text.to_uppercase().contains(&country.alpha3) {
108 let pattern = country.alpha3.to_string();
109 if let Some(pos) = text.to_uppercase().find(&pattern) {
110 let start = pos;
111 let end = pos + pattern.len();
112
113 let prev_valid = start == 0 || is_boundary_char(text.chars().nth(start-1).unwrap_or(' '));
115 let next_valid = end >= text.len() || is_boundary_char(text.chars().nth(end).unwrap_or(' ')) || text.chars().nth(end).unwrap_or(' ').is_numeric();
116
117 if prev_valid && next_valid {
118 return Ok(country.clone());
119 }
120 }
121 }
122
123 if text.to_uppercase().contains(&country.alpha2) {
125 let pattern = country.alpha2.to_string();
126 if let Some(pos) = text.to_uppercase().find(&pattern) {
127 let start = pos;
128 let end = pos + pattern.len();
129
130 let prev_valid = start == 0 || is_boundary_char(text.chars().nth(start-1).unwrap_or(' '));
132 let next_valid = end >= text.len() || is_boundary_char(text.chars().nth(end).unwrap_or(' ')) || text.chars().nth(end).unwrap_or(' ').is_numeric();
133
134 if prev_valid && next_valid {
135 return Ok(country.clone());
136 }
137 }
138 }
139 }
140
141 Err(ParseError::not_found(text))
142}
143
144fn parse_abbreviations(
146 text: &str,
147 config_data: &Configuration,
148) -> Result<CountryInfo, ParseError> {
149 let processed_text = text.to_lowercase();
150
151 for country in config_data.get_countries() {
152 for abbr in &country.abbreviations {
153 if processed_text.contains(&abbr.to_lowercase()) {
154 return Ok(country.clone());
155 }
156 }
157 }
158
159 Err(ParseError::not_found(text))
160}
161
162fn parse_traditional_chinese_names(
164 text: &str,
165 config_data: &Configuration,
166) -> Result<CountryInfo, ParseError> {
167 for country in config_data.get_countries() {
168 if text.contains(&country.name_zh_tw) {
169 return Ok(country.clone());
170 }
171 }
172
173 Err(ParseError::not_found(text))
174}
175
176fn parse_simplified_chinese_names(
178 text: &str,
179 config_data: &Configuration,
180) -> Result<CountryInfo, ParseError> {
181 for country in config_data.get_countries() {
182 if text.contains(&country.name_zh_cn) {
183 return Ok(country.clone());
184 }
185 }
186
187 Err(ParseError::not_found(text))
188}
189
190fn parse_english_names(
192 text: &str,
193 config_data: &Configuration,
194) -> Result<CountryInfo, ParseError> {
195 let processed_text = text.to_lowercase();
196
197 for country in config_data.get_countries() {
198 if processed_text.contains(&country.name_en.to_lowercase()) {
199 return Ok(country.clone());
200 }
201 }
202
203 Err(ParseError::not_found(text))
204}
205
206
207
208fn is_boundary_char(c: char) -> bool {
212 c.is_whitespace() || c == '@' || c == '【' || c == '[' || c == '#' ||
213 c == ']' || c == '】' || c == ' ' || c == '\t' || c == '\n'
214}