internet-util 0.1.0

A internet util.
Documentation
use std::io::Read;

use encoding_rs::GBK;
use flate2::read::GzDecoder;
use reqwest::{Client, header};
use scraper::{Html, Selector};

use crate::{proxy_object::ProxyObj, ip_util};

#[tokio::main]
pub async fn get_proxy_list(proxy_size: usize) -> Vec<ProxyObj> {
    let mut headers = header::HeaderMap::new();
    headers.insert("Upgrade-Insecure-Requests", header::HeaderValue::from_static("1"));
    headers.insert("User-Agent", header::HeaderValue
        ::from_static("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"));
    headers.insert("Accept", header::HeaderValue
        ::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"));
    headers.insert("Referer", header::HeaderValue::from_static("http://www.ip3366.net/"));
    headers.insert("Accept-Encoding", header::HeaderValue::from_static("gzip, deflate, sdch"));
    headers.insert("Accept-Language", header::HeaderValue::from_static("zh-CN,zh;q=0.8"));
    let client = Client::builder().default_headers(headers).referer(true).build().unwrap();
    let mut proxy_list:Vec<ProxyObj> = Vec::new();
    let mut page_no = 1;
    loop {
        let mut data_list = get_proxy_by_page(client.clone(), page_no).await;
        proxy_list.append(&mut data_list);
        page_no += 1;
        if proxy_list.len() >= proxy_size {
            break;
        }
    }
    proxy_list
}

pub async fn get_proxy_by_page(client: Client, page_no: u8) -> Vec<ProxyObj> {
    let mut proxy_list:Vec<ProxyObj> = Vec::new();
    let url = format!("http://www.ip3366.net/?stype=1&page={}", page_no);
    let resp = client.get(url).send().await.unwrap();
    let body = resp.bytes().await.unwrap();
    // 创建 GzDecoder 对象
    let mut decoder = GzDecoder::new(&body[..]);
    let mut data = Vec::new();
    decoder.read_to_end(&mut data).unwrap();
    let (cow, _encoding_used, _had_errors) = GBK.decode(&data);
    let html = cow.into_owned();
    let doc = Html::parse_fragment(&html);
    let tbody_tr_selector = Selector::parse("tbody tr").unwrap();
    let td_selector = Selector::parse("td").unwrap();
    for tr in doc.select(&tbody_tr_selector) {
        let td_list = tr.select(&td_selector);
        let mut protocols = "".into();
        let mut ip_addr = "".into();
        let mut port = 0;
        for (i, ele) in td_list.enumerate() {
            if i == 0 {
                ip_addr = ele.inner_html();
            }
            if i == 1 {
                port = ele.inner_html().parse().unwrap();
            }
            if i == 4 {
                protocols = ele.inner_html();
            }
        }
        let proxy = ProxyObj::new(protocols, ip_addr, port);
        // if ip_util::check_ip(proxy.clone().ip_addr) < 100.0 {
            proxy_list.push(proxy);
        // }
    }
    proxy_list
}