use std::collections::HashMap;
use regex::{Regex};
pub fn get_date(text: &str) -> Vec<&str> {
let re = Regex::new(r#"[0-9]{4}-[0-9]{2}-[0-9]{2}"#).unwrap();
let dates: Vec<&str> = re.find_iter(text).map(|m| m.as_str()).collect();
dates
}
pub fn get_social_unified_number(text: &str) -> HashMap<&'static str, String> {
let re = Regex::new(r#"(?<province>[0-9]{1})(?<organization_type>[0-9]{1})(?<area>[0-9]{6})(?<code>[0-9A-Z]{9})(?<check>[0-9A-Z]{1})"#).unwrap();
let mut data = HashMap::new();
let _: Vec<_> = re.captures_iter(text).map(|caps| {
let rmdc;
let org_type;
let mut check_code = 0;
let index_a = [1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, 30, 28];
let index_b = vec!["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "T", "U", "W", "X", "Y"];
for i in 0..text.len() - 1 {
for (ii, _) in index_b.iter().enumerate() {
if index_b[ii] == &text[i..i + 1] {
check_code += index_a[i] * ii as i32;
}
}
}
match caps.name("province").unwrap().as_str().to_string().parse::<i32>().unwrap() {
1 => {
rmdc = "机构";
match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
1 => org_type = "机关单位",
2 => org_type = "事业单位",
_ => org_type = "中央编办直接管理机构编制的群众团体"
}
}
2 => {
rmdc = "民政";
match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
1 => org_type = "社会团体",
2 => org_type = "民办非企业单位",
_ => org_type = "基金会"
}
}
9 => {
rmdc = "工商";
match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
1 => org_type = "企业",
2 => org_type = "个体工商户",
_ => org_type = "农民专业合作社"
}
}
_ => {
rmdc = "其它";
org_type = "其它";
}
}
check_code = 31 - check_code % 31;
if check_code.to_string() == *caps.name("check").unwrap().as_str() {
data.insert("area", caps.name("area").unwrap().as_str().to_string());
data.insert("rmdc", rmdc.to_string());
data.insert("org_type", org_type.to_string());
data.insert("code", caps.name("code").unwrap().as_str().to_string());
data.insert("check", caps.name("check").unwrap().as_str().to_string());
}
}).collect();
data
}
pub fn get_address(text: &str) -> HashMap<&'static str, String> {
let mut data = HashMap::new();
data.insert("phone", "".to_string());
data.insert("name", "".to_string());
data.insert("address", "".to_string());
let mut text = text.to_string().clone();
let re = Regex::new(r#"(?: |^)[0-9]{10,11}([(?= )]|$)"#).unwrap();
let binding = text.clone();
let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
for item in res.iter() {
text = text.replace(&item.trim().to_string(), "").to_string();
data.insert("phone", item.trim().to_string());
}
let binding = text.clone();
let re = Regex::new(r#"((?: )|^)[\u4e00-\u9fa5]{2,4}([(?= )]|$)"#).unwrap();
let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
for item in res.iter() {
text = text.replace(&item.trim().to_string(), "").to_string();
data.insert("name", item.trim().to_string());
}
let re = Regex::new(r#"(?<province>[^省]+省|.+自治区)(?<city>[^自治州]+自治州|[^市]+市|[^盟]+盟|[^地区]+地区|.+区划)(?<county>[^市]+市|[^县]+县|[^旗]+旗|.+区)?(?<town>[^街道]+街道|[^镇]+镇|.+乡)?(?<address>.*)?"#).unwrap();
let _: Vec<_> = re.captures_iter(&text).map(|caps| {
let province = caps.name("province").unwrap().as_str();
data.insert("province", province.trim().to_string());
let city = caps.name("city").unwrap().as_str();
data.insert("city", city.trim().to_string());
let county = caps.name("county").unwrap().as_str();
data.insert("county", county.trim().to_string());
match caps.name("town").is_none() {
true => {
data.insert("town", "".to_string());
}
false => {
data.insert("town", caps.name("town").unwrap().as_str().trim().to_string());
}
}
match caps.name("address").is_none() {
true => {
data.insert("address", "".to_string());
}
false => {
data.insert("address", caps.name("address").unwrap().as_str().trim().to_string());
}
}
}).collect();
data
}
#[cfg(test)]
mod tests {
use crate::regex::{get_address, get_date, get_social_unified_number};
#[test]
fn regex() {
let res = get_date(">>>1865-04-14|||");
for item in res {
assert_eq!("1865-04-14", item);
}
let res = get_social_unified_number("91510100MAC86DTL05");
for (key, value) in res {
println!("{} {}", key, value.clone());
}
let res = get_address("中国四川省成都市高新区");
println!("{:?}", res);
}
}