br-code 0.1.7

This is an code
Documentation
use std::collections::HashMap;
use regex::{Regex};

/// 提取年月日
pub fn get_date(text: &str) -> Vec<&str> {
    let re = Regex::new(r#"[0-9]{4}-[0-9]{2}-[0-9]{2}"#).unwrap();
    let dates: Vec<&str> = re.find_iter(text).map(|m| m.as_str()).collect();
    dates
}

/// 获取纳税人识别号
pub fn get_social_unified_number(text: &str) -> HashMap<&'static str, String> {
    let re = Regex::new(r#"(?<province>[0-9]{1})(?<organization_type>[0-9]{1})(?<area>[0-9]{6})(?<code>[0-9A-Z]{9})(?<check>[0-9A-Z]{1})"#).unwrap();
    let mut data = HashMap::new();
    let _: Vec<_> = re.captures_iter(text).map(|caps| {
        let rmdc;
        let org_type;

        let mut check_code = 0;
        let index_a = [1, 3, 9, 27, 19, 26, 16, 17, 20, 29, 25, 13, 8, 24, 10, 30, 28];
        let index_b = vec!["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H", "J", "K", "L", "M", "N", "P", "Q", "R", "T", "U", "W", "X", "Y"];
        for i in 0..text.len() - 1 {
            for (ii, _) in index_b.iter().enumerate() {
                if index_b[ii] == &text[i..i + 1] {
                    check_code += index_a[i] * ii as i32;
                }
            }
        }

        match caps.name("province").unwrap().as_str().to_string().parse::<i32>().unwrap() {
            1 => {
                rmdc = "机构";
                match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
                    1 => org_type = "机关单位",
                    2 => org_type = "事业单位",
                    _ => org_type = "中央编办直接管理机构编制的群众团体"
                }
            }
            2 => {
                rmdc = "民政";
                match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
                    1 => org_type = "社会团体",
                    2 => org_type = "民办非企业单位",
                    _ => org_type = "基金会"
                }
            }
            9 => {
                rmdc = "工商";
                match caps.name("organization_type").unwrap().as_str().to_string().parse::<i32>().unwrap() {
                    1 => org_type = "企业",
                    2 => org_type = "个体工商户",
                    _ => org_type = "农民专业合作社"
                }
            }
            _ => {
                rmdc = "其它";
                org_type = "其它";
            }
        }

        check_code = 31 - check_code % 31;

        if check_code.to_string() == *caps.name("check").unwrap().as_str() {
            data.insert("area", caps.name("area").unwrap().as_str().to_string());
            data.insert("rmdc", rmdc.to_string());
            data.insert("org_type", org_type.to_string());
            data.insert("code", caps.name("code").unwrap().as_str().to_string());
            data.insert("check", caps.name("check").unwrap().as_str().to_string());
        }
    }).collect();

    data
}


/// 获取收货地址信息
pub fn get_address(text: &str) -> HashMap<&'static str, String> {
    let mut data = HashMap::new();
    data.insert("phone", "".to_string());
    data.insert("name", "".to_string());
    data.insert("address", "".to_string());

    let mut text = text.to_string().clone();
    let re = Regex::new(r#"(?: |^)[0-9]{10,11}([(?= )]|$)"#).unwrap();
    let binding = text.clone();
    let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
    for item in res.iter() {
        text = text.replace(&item.trim().to_string(), "").to_string();
        data.insert("phone", item.trim().to_string());
    }

    let binding = text.clone();
    let re = Regex::new(r#"((?: )|^)[\u4e00-\u9fa5]{2,4}([(?= )]|$)"#).unwrap();
    let res = re.find_iter(&binding).map(|x| { x.as_str() }).collect::<Vec<&str>>();
    for item in res.iter() {
        text = text.replace(&item.trim().to_string(), "").to_string();
        data.insert("name", item.trim().to_string());
    }

    let re = Regex::new(r#"(?<province>[^省]+省|.+自治区)(?<city>[^自治州]+自治州|[^市]+市|[^盟]+盟|[^地区]+地区|.+区划)(?<county>[^市]+市|[^县]+县|[^旗]+旗|.+区)?(?<town>[^街道]+街道|[^镇]+镇|.+乡)?(?<address>.*)?"#).unwrap();
    let _: Vec<_> = re.captures_iter(&text).map(|caps| {
        let province = caps.name("province").unwrap().as_str();
        data.insert("province", province.trim().to_string());

        let city = caps.name("city").unwrap().as_str();
        data.insert("city", city.trim().to_string());

        let county = caps.name("county").unwrap().as_str();
        data.insert("county", county.trim().to_string());

        match caps.name("town").is_none() {
            true => {
                data.insert("town", "".to_string());
            }
            false => {
                data.insert("town", caps.name("town").unwrap().as_str().trim().to_string());
            }
        }

        match caps.name("address").is_none() {
            true => {
                data.insert("address", "".to_string());
            }
            false => {
                data.insert("address", caps.name("address").unwrap().as_str().trim().to_string());
            }
        }
    }).collect();

    data
}

#[cfg(test)]
mod tests {
    use crate::regex::{get_address, get_date, get_social_unified_number};

    #[test]
    fn regex() {
        let res = get_date(">>>1865-04-14|||");
        for item in res {
            assert_eq!("1865-04-14", item);
        }

        let res = get_social_unified_number("91510100MAC86DTL05");
        for (key, value) in res {
            println!("{} {}", key, value.clone());
        }

        let res = get_address("中国四川省成都市高新区");
        println!("{:?}", res);
    }
}