1use std::collections::HashMap;
2use regex::{Regex};
3
4pub fn get_date(text: &str) -> Vec<&str> {
6 let re = Regex::new(r#"[0-9]{4}-[0-9]{2}-[0-9]{2}"#).unwrap();
7 let dates: Vec<&str> = re.find_iter(text).map(|m| m.as_str()).collect();
8 dates
9}
10
11pub fn get_social_unified_number(text: &str) -> HashMap<&'static str, String> {
13 let re = Regex::new(r#"(?<province>[0-9]{1})(?<organization_type>[0-9]{1})(?<area>[0-9]{6})(?<code>[0-9A-Z]{9})(?<check>[0-9A-Z]{1})"#).unwrap();
14 let mut data = HashMap::new();
15 let _: Vec<_> = re.captures_iter(text).map(|caps| {
16 let rmdc;
17 let org_type;
18
19 let mut check_code = 0;
20 let index_a = [1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, 30, 28];
21 let index_b = vec!["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "T", "U", "W", "X", "Y"];
22 for i in 0..text.len() - 1 {
23 for (ii,_) in index_b.iter().enumerate() {
24 if index_b[ii] == &text[i..i + 1] {
25 check_code += index_a[i] * ii as i32;
26 }
27 }
28 }
29
30 match caps.name("province").unwrap().as_str().to_string().parse::<i32>().unwrap() {
31 1 => {
32 rmdc = "机构";
33 match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
34 1 => org_type = "机关单位",
35 2 => org_type = "事业单位",
36 _ => org_type = "中央编办直接管理机构编制的群众团体"
37 }
38 }
39 2 => {
40 rmdc = "民政";
41 match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
42 1 => org_type = "社会团体",
43 2 => org_type = "民办非企业单位",
44 _ => org_type = "基金会"
45 }
46 }
47 9 => {
48 rmdc = "工商";
49 match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
50 1 => org_type = "企业",
51 2 => org_type = "个体工商户",
52 _ => org_type = "农民专业合作社"
53 }
54 }
55 _ => {
56 rmdc = "其它";
57 org_type = "其它";
58 }
59 }
60
61 check_code = 31 - check_code % 31;
62
63 if check_code.to_string() == *caps.name("check").unwrap().as_str() {
64 data.insert("area", caps.name("area").unwrap().as_str().to_string());
65 data.insert("rmdc", rmdc.to_string());
66 data.insert("org_type", org_type.to_string());
67 data.insert("code", caps.name("code").unwrap().as_str().to_string());
68 data.insert("check", caps.name("check").unwrap().as_str().to_string());
69 }
70 }).collect();
71
72 data
73}
74
75
76pub fn get_address(text: &str) -> HashMap<&'static str, String> {
78 let mut data = HashMap::new();
79 data.insert("phone", "".to_string());
80 data.insert("name", "".to_string());
81 data.insert("address", "".to_string());
82
83 let mut text = text.to_string().clone();
84 let re = Regex::new(r#"(?: |^)[0-9]{10,11}([(?= )]|$)"#).unwrap();
85 let binding = text.clone();
86 let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
87 for item in res.iter() {
88 text = text.replace(&item.trim().to_string(), "").to_string();
89 data.insert("phone", item.trim().to_string());
90 }
91
92 let binding = text.clone();
93 let re = Regex::new(r#"((?: )|^)[\u4e00-\u9fa5]{2,4}([(?= )]|$)"#).unwrap();
94 let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
95 for item in res.iter() {
96 text = text.replace(&item.trim().to_string(), "").to_string();
97 data.insert("name", item.trim().to_string());
98 }
99
100 let re = Regex::new(r#"(?<province>[^省]+省|.+自治区)(?<city>[^自治州]+自治州|[^市]+市|[^盟]+盟|[^地区]+地区|.+区划)(?<county>[^市]+市|[^县]+县|[^旗]+旗|.+区)?(?<town>[^街道]+街道|[^镇]+镇|.+乡)?(?<address>.*)?"#).unwrap();
101 let _: Vec<_> = re.captures_iter(&text).map(|caps| {
102 let province = caps.name("province").unwrap().as_str();
103 data.insert("province", province.trim().to_string());
104
105 let city = caps.name("city").unwrap().as_str();
106 data.insert("city", city.trim().to_string());
107
108 let county = caps.name("county").unwrap().as_str();
109 data.insert("county", county.trim().to_string());
110
111 match caps.name("town").is_none() {
112 true => {
113 data.insert("town", "".to_string());
114 }
115 false => {
116 data.insert("town", caps.name("town").unwrap().as_str().trim().to_string());
117 }
118 }
119
120 match caps.name("address").is_none() {
121 true => {
122 data.insert("address", "".to_string());
123 }
124 false => {
125 data.insert("address", caps.name("address").unwrap().as_str().trim().to_string());
126 }
127 }
128 }).collect();
129
130 data
131}