1use std::collections::HashMap;
2use crate::error::ParseError;
3use crate::config::{Configuration, CountryInfo};
4use crate::ParserConfig;
5
6pub fn parse_country_code(text: &str) -> Result<CountryInfo, ParseError> {
8 parse_country_code_with_config(text, &ParserConfig::default())
9}
10
11pub fn parse_country_code_with_config(
13 text: &str,
14 config: &ParserConfig,
15) -> Result<CountryInfo, ParseError> {
16 if text.trim().is_empty() {
18 return Err(ParseError::invalid_input("输入文本为空"));
19 }
20
21 if text.len() > 1024 {
22 return Err(ParseError::invalid_input("输入文本过长"));
23 }
24
25 let config_data = Configuration::load()
27 .map_err(|e| ParseError::config_error(&format!("配置加载失败: {}", e)))?;
28
29 let country_mapping = config_data.create_country_mapping();
31
32 let processed_text = if config.case_sensitive {
34 text.to_string()
35 } else {
36 text.to_lowercase()
37 };
38
39 match parse_iso_codes(&processed_text, &country_mapping, config) {
41 Ok(country_info) => Ok(country_info),
42 Err(_) => match parse_chinese_names(&processed_text, &country_mapping, config) {
43 Ok(country_info) => Ok(country_info),
44 Err(_) => parse_pattern_matching(&processed_text, &country_mapping, config),
45 },
46 }
47}
48
49fn parse_iso_codes(
51 text: &str,
52 mapping: &HashMap<String, &CountryInfo>,
53 _config: &ParserConfig,
54) -> Result<CountryInfo, ParseError> {
55 let chars: Vec<char> = text.chars().collect();
56
57 for i in 0..chars.len().saturating_sub(1) {
59 if chars[i].is_alphabetic() && chars[i+1].is_alphabetic() {
60 let slice: String = chars[i..i+2].iter().collect();
61 if let Some(country_info) = mapping.get(&slice.to_uppercase()) {
62 if is_valid_iso_code_position_chars(&chars, i, 2) {
63 return Ok((*country_info).clone());
64 }
65 }
66 }
67 }
68
69 for i in 0..chars.len().saturating_sub(2) {
71 if chars[i].is_alphabetic() && chars[i+1].is_alphabetic() && chars[i+2].is_alphabetic() {
72 let slice: String = chars[i..i+3].iter().collect();
73 if let Some(country_info) = mapping.get(&slice.to_uppercase()) {
74 if is_valid_iso_code_position_chars(&chars, i, 3) {
75 return Ok((*country_info).clone());
76 }
77 }
78 }
79 }
80
81 Err(ParseError::not_found(text))
82}
83
84fn parse_chinese_names(
86 text: &str,
87 mapping: &HashMap<String, &CountryInfo>,
88 _config: &ParserConfig,
89) -> Result<CountryInfo, ParseError> {
90 for (name, country_info) in mapping {
92 if name.chars().any(|c| c.is_alphabetic() && c as u32 > 255) && text.contains(name) {
94 return Ok((*country_info).clone());
95 }
96 }
97
98 Err(ParseError::not_found(text))
99}
100
101
102
103fn is_valid_iso_code_position_chars(chars: &[char], start: usize, length: usize) -> bool {
105 if start > 0 {
107 let prev_char = chars[start - 1];
108 if !is_boundary_char(prev_char) {
109 return false;
110 }
111 }
112
113 if start + length < chars.len() {
115 let next_char = chars[start + length];
116 if !is_boundary_char(next_char) {
117 return false;
118 }
119 }
120
121 true
122}
123
124fn parse_pattern_matching(
126 text: &str,
127 mapping: &HashMap<String, &CountryInfo>,
128 config: &ParserConfig,
129) -> Result<CountryInfo, ParseError> {
130 if config.fuzzy_match {
132 let mut candidates = Vec::new();
133 let text_lower = text.to_lowercase();
134
135 for (name, country_info) in mapping {
136 let name_lower = name.to_lowercase();
137
138 if text_lower.contains(&name_lower) && name.len() == 2 && name.chars().all(char::is_uppercase) {
140 candidates.push(((**country_info).clone(), 2)); }
142 else if text_lower.contains(&name_lower) && name.len() == 3 && name.chars().all(char::is_uppercase) {
144 candidates.push(((**country_info).clone(), 1));
145 }
146 else if text.contains(name) && name.chars().any(|c| c.is_alphabetic() && c as u32 > 255) {
148 candidates.push(((**country_info).clone(), 3));
149 }
150 else if text_lower.contains(&name_lower) && name_lower.chars().all(|c| c.is_alphabetic() || c.is_whitespace()) {
152 candidates.push(((**country_info).clone(), 4));
153 }
154 }
155
156 if !candidates.is_empty() {
157 candidates.sort_by_key(|&(_, priority)| priority);
159 return Ok(candidates[0].0.clone());
160 }
161 }
162
163 Err(ParseError::not_found(text))
164}
165
166fn is_boundary_char(c: char) -> bool {
168 c.is_whitespace() || c == '@' || c == '【' || c == '[' || c == '#' ||
169 c == ']' || c == '】' || c == ' ' || c == '\t' || c == '\n'
170}