br_code/
regex.rs

1use std::collections::HashMap;
2use regex::{Regex};
3
4/// 提取年月日
5pub fn get_date(text: &str) -> Vec<&str> {
6    let re = Regex::new(r#"[0-9]{4}-[0-9]{2}-[0-9]{2}"#).unwrap();
7    let dates: Vec<&str> = re.find_iter(text).map(|m| m.as_str()).collect();
8    dates
9}
10
11/// 获取纳税人识别号
12pub fn get_social_unified_number(text: &str) -> HashMap<&'static str, String> {
13    let re = Regex::new(r#"(?<province>[0-9]{1})(?<organization_type>[0-9]{1})(?<area>[0-9]{6})(?<code>[0-9A-Z]{9})(?<check>[0-9A-Z]{1})"#).unwrap();
14    let mut data = HashMap::new();
15    let _: Vec<_> = re.captures_iter(text).map(|caps| {
16        let rmdc;
17        let org_type;
18
19        let mut check_code = 0;
20        let index_a = [1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, 30, 28];
21        let index_b = vec!["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "T", "U", "W", "X", "Y"];
22        for i in 0..text.len() - 1 {
23            for (ii,_) in index_b.iter().enumerate() {
24                if index_b[ii] == &text[i..i + 1] {
25                    check_code += index_a[i] * ii as i32;
26                }
27            }
28        }
29
30        match caps.name("province").unwrap().as_str().to_string().parse::<i32>().unwrap() {
31            1 => {
32                rmdc = "机构";
33                match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
34                    1 => org_type = "机关单位",
35                    2 => org_type = "事业单位",
36                    _ => org_type = "中央编办直接管理机构编制的群众团体"
37                }
38            }
39            2 => {
40                rmdc = "民政";
41                match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
42                    1 => org_type = "社会团体",
43                    2 => org_type = "民办非企业单位",
44                    _ => org_type = "基金会"
45                }
46            }
47            9 => {
48                rmdc = "工商";
49                match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
50                    1 => org_type = "企业",
51                    2 => org_type = "个体工商户",
52                    _ => org_type = "农民专业合作社"
53                }
54            }
55            _ => {
56                rmdc = "其它";
57                org_type = "其它";
58            }
59        }
60
61        check_code = 31 - check_code % 31;
62
63        if check_code.to_string() == *caps.name("check").unwrap().as_str() {
64            data.insert("area", caps.name("area").unwrap().as_str().to_string());
65            data.insert("rmdc", rmdc.to_string());
66            data.insert("org_type", org_type.to_string());
67            data.insert("code", caps.name("code").unwrap().as_str().to_string());
68            data.insert("check", caps.name("check").unwrap().as_str().to_string());
69        }
70    }).collect();
71
72    data
73}
74
75
76/// 获取收货地址信息
77pub fn get_address(text: &str) -> HashMap<&'static str, String> {
78    let mut data = HashMap::new();
79    data.insert("phone", "".to_string());
80    data.insert("name", "".to_string());
81    data.insert("address", "".to_string());
82
83    let mut text = text.to_string().clone();
84    let re = Regex::new(r#"(?: |^)[0-9]{10,11}([(?= )]|$)"#).unwrap();
85    let binding = text.clone();
86    let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
87    for item in res.iter() {
88        text = text.replace(&item.trim().to_string(), "").to_string();
89        data.insert("phone", item.trim().to_string());
90    }
91
92    let binding = text.clone();
93    let re = Regex::new(r#"((?: )|^)[\u4e00-\u9fa5]{2,4}([(?= )]|$)"#).unwrap();
94    let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
95    for item in res.iter() {
96        text = text.replace(&item.trim().to_string(), "").to_string();
97        data.insert("name", item.trim().to_string());
98    }
99
100    let re = Regex::new(r#"(?<province>[^省]+省|.+自治区)(?<city>[^自治州]+自治州|[^市]+市|[^盟]+盟|[^地区]+地区|.+区划)(?<county>[^市]+市|[^县]+县|[^旗]+旗|.+区)?(?<town>[^街道]+街道|[^镇]+镇|.+乡)?(?<address>.*)?"#).unwrap();
101    let _: Vec<_> = re.captures_iter(&text).map(|caps| {
102        let province = caps.name("province").unwrap().as_str();
103        data.insert("province", province.trim().to_string());
104
105        let city = caps.name("city").unwrap().as_str();
106        data.insert("city", city.trim().to_string());
107
108        let county = caps.name("county").unwrap().as_str();
109        data.insert("county", county.trim().to_string());
110
111        match caps.name("town").is_none() {
112            true => {
113                data.insert("town", "".to_string());
114            }
115            false => {
116                data.insert("town", caps.name("town").unwrap().as_str().trim().to_string());
117            }
118        }
119
120        match caps.name("address").is_none() {
121            true => {
122                data.insert("address", "".to_string());
123            }
124            false => {
125                data.insert("address", caps.name("address").unwrap().as_str().trim().to_string());
126            }
127        }
128    }).collect();
129
130    data
131}