geosite-rs 0.1.5

A simple crate that parses geosite.dat file format
Documentation
use std::{
    collections::HashMap,
    net::{Ipv4Addr, Ipv6Addr},
};

use prost::Message;

/// parse dat data (which has protobuf format)
pub fn decode_geosite(buf: &[u8]) -> Result<GeoSiteList, prost::DecodeError> {
    GeoSiteList::decode(buf)
}

/// save to the dat format (which has protobuf format)
pub fn encode_geosite(sg: GeoSiteList) -> Vec<u8> {
    sg.encode_to_vec()
}

/// parse dat data (which has protobuf format)
pub fn decode_geoip(buf: &[u8]) -> Result<GeoIpList, prost::DecodeError> {
    GeoIpList::decode(buf)
}

/// save to the dat format (which has protobuf format)
pub fn encode_geoip(sg: GeoIpList) -> Vec<u8> {
    sg.encode_to_vec()
}

fn vec_to_u32_be(bytes: &[u8]) -> u32 {
    assert!(bytes.len() == 4, "Input Vec<u8> must have exactly 4 bytes");
    let mut array = [0u8; 4];
    array.copy_from_slice(bytes);
    u32::from_be_bytes(array)
}
fn vec_to_u128_be(bytes: &[u8]) -> u128 {
    assert!(
        bytes.len() == 16,
        "Input Vec<u8> must have exactly 16 bytes"
    );
    let mut array = [0u8; 16];
    array.copy_from_slice(bytes);
    u128::from_be_bytes(array)
}

/// covert to a hashmap that is compatible with the one in crate 'clash_rules'
///
/// key is "IP-CIDR","IP-CIDR6"
pub fn geoip_to_hashmap(
    geoip_list: &GeoIpList,
    country_target_map: HashMap<String, String>,
) -> HashMap<String, Vec<Vec<String>>> {
    let mut map: HashMap<String, Vec<Vec<String>>> = HashMap::new();
    for ipg in &geoip_list.entry {
        for cidr in &ipg.cidr {
            let ipn = &cidr.ip;
            if ipn.len() == 4 {
                let ia = Ipv4Addr::from(vec_to_u32_be(ipn));
                let is = ia.to_string();
                let s = format!("{is}/{}", cidr.prefix);
                let v = vec![
                    s,
                    country_target_map
                        .get(&ipg.country_code)
                        .unwrap()
                        .to_string(),
                ];
                map.entry("IP-CIDR".to_string()).or_default().push(v);
            } else if ipn.len() == 16 {
                let ia = Ipv6Addr::from(vec_to_u128_be(ipn));
                let is = ia.to_string();
                let s = format!("{is}/{}", cidr.prefix);
                let v = vec![
                    s,
                    country_target_map
                        .get(&ipg.country_code)
                        .unwrap()
                        .to_string(),
                ];
                map.entry("IP-CIDR6".to_string()).or_default().push(v);
            }
        }
    }
    map
}

/// covert to a hashmap that is compatible with the one in crate 'clash_rules'
///
/// key is "DOMAIN-KEYWORD","DOMAIN-SUFFIX","DOMAIN","DOMAIN-REGEX".
pub fn geosite_to_hashmap(
    site_group_list: &GeoSiteList,
    group_target_map: HashMap<String, String>,
) -> HashMap<String, Vec<Vec<String>>> {
    let mut map: HashMap<String, Vec<Vec<String>>> = HashMap::new();

    for group in &site_group_list.entry {
        for domain in &group.domain {
            let key = match domain.r#type {
                0 => "DOMAIN-KEYWORD", // Plain
                1 => "DOMAIN-SUFFIX",  // Domain
                2 => "DOMAIN",         // Full
                3 => "DOMAIN-REGEX",   // Regex
                _ => continue,         // 跳过未知类型
            };

            let v = vec![
                domain.value.clone(),
                group_target_map
                    .get(&group.country_code)
                    .unwrap()
                    .to_string(),
            ];
            map.entry(key.to_string()).or_default().push(v);
        }
    }

    map
}
//include!(concat!(env!("OUT_DIR"), "/_.rs"));
//
// This file is @generated by prost-build.
/// Domain for routing decision.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Domain {
    /// Domain matching type.
    #[prost(enumeration = "domain::Type", tag = "1")]
    pub r#type: i32,
    /// Domain value.
    #[prost(string, tag = "2")]
    pub value: ::prost::alloc::string::String,
    /// Attributes of this domain. May be used for filtering.
    #[prost(message, repeated, tag = "3")]
    pub attribute: ::prost::alloc::vec::Vec<domain::Attribute>,
}
/// Nested message and enum types in `Domain`.
pub mod domain {
    #[derive(Clone, PartialEq, ::prost::Message)]
    pub struct Attribute {
        #[prost(string, tag = "1")]
        pub key: ::prost::alloc::string::String,
        #[prost(oneof = "attribute::TypedValue", tags = "2, 3")]
        pub typed_value: ::core::option::Option<attribute::TypedValue>,
    }
    /// Nested message and enum types in `Attribute`.
    pub mod attribute {
        #[derive(Clone, Copy, PartialEq, ::prost::Oneof)]
        pub enum TypedValue {
            #[prost(bool, tag = "2")]
            BoolValue(bool),
            #[prost(int64, tag = "3")]
            IntValue(i64),
        }
    }
    /// Type of domain value.
    #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)]
    #[repr(i32)]
    pub enum Type {
        /// The value is used as is.
        Plain = 0,
        /// The value is used as a regular expression.
        Regex = 1,
        /// The value is a root domain.
        Domain = 2,
        /// The value is a domain.
        Full = 3,
    }
    impl Type {
        /// String value of the enum field names used in the ProtoBuf definition.
        ///
        /// The values are not transformed in any way and thus are considered stable
        /// (if the ProtoBuf definition does not change) and safe for programmatic use.
        pub fn as_str_name(&self) -> &'static str {
            match self {
                Self::Plain => "Plain",
                Self::Regex => "Regex",
                Self::Domain => "Domain",
                Self::Full => "Full",
            }
        }
        /// Creates an enum from field names used in the ProtoBuf definition.
        pub fn from_str_name(value: &str) -> ::core::option::Option<Self> {
            match value {
                "Plain" => Some(Self::Plain),
                "Regex" => Some(Self::Regex),
                "Domain" => Some(Self::Domain),
                "Full" => Some(Self::Full),
                _ => None,
            }
        }
    }
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoSite {
    #[prost(string, tag = "1")]
    pub country_code: ::prost::alloc::string::String,
    #[prost(message, repeated, tag = "2")]
    pub domain: ::prost::alloc::vec::Vec<Domain>,
}

/// the final dat file has this type
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoSiteList {
    #[prost(message, repeated, tag = "1")]
    pub entry: ::prost::alloc::vec::Vec<GeoSite>,
}
/// IP for routing decision, in CIDR form.
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct Cidr {
    /// IP address, should be either 4 or 16 bytes.
    #[prost(bytes = "vec", tag = "1")]
    pub ip: ::prost::alloc::vec::Vec<u8>,
    /// Number of leading ones in the network mask.
    #[prost(uint32, tag = "2")]
    pub prefix: u32,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoIp {
    #[prost(string, tag = "1")]
    pub country_code: ::prost::alloc::string::String,
    #[prost(message, repeated, tag = "2")]
    pub cidr: ::prost::alloc::vec::Vec<Cidr>,
    #[prost(bool, tag = "3")]
    pub reverse_match: bool,
}
#[derive(Clone, PartialEq, ::prost::Message)]
pub struct GeoIpList {
    #[prost(message, repeated, tag = "1")]
    pub entry: ::prost::alloc::vec::Vec<GeoIp>,
}