use std::io::Read;
use encoding_rs::GBK;
use flate2::read::GzDecoder;
use reqwest::{Client, header};
use scraper::{Html, Selector};
use crate::{proxy_object::ProxyObj, ip_util};
#[tokio::main]
pub async fn get_proxy_list(proxy_size: usize) -> Vec<ProxyObj> {
let mut headers = header::HeaderMap::new();
headers.insert("Upgrade-Insecure-Requests", header::HeaderValue::from_static("1"));
headers.insert("User-Agent", header::HeaderValue
::from_static("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36"));
headers.insert("Accept", header::HeaderValue
::from_static("text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"));
headers.insert("Referer", header::HeaderValue::from_static("http://www.ip3366.net/"));
headers.insert("Accept-Encoding", header::HeaderValue::from_static("gzip, deflate, sdch"));
headers.insert("Accept-Language", header::HeaderValue::from_static("zh-CN,zh;q=0.8"));
let client = Client::builder().default_headers(headers).referer(true).build().unwrap();
let mut proxy_list:Vec<ProxyObj> = Vec::new();
let mut page_no = 1;
loop {
let mut data_list = get_proxy_by_page(client.clone(), page_no).await;
proxy_list.append(&mut data_list);
page_no += 1;
if proxy_list.len() >= proxy_size {
break;
}
}
proxy_list
}
pub async fn get_proxy_by_page(client: Client, page_no: u8) -> Vec<ProxyObj> {
let mut proxy_list:Vec<ProxyObj> = Vec::new();
let url = format!("http://www.ip3366.net/?stype=1&page={}", page_no);
let resp = client.get(url).send().await.unwrap();
let body = resp.bytes().await.unwrap();
let mut decoder = GzDecoder::new(&body[..]);
let mut data = Vec::new();
decoder.read_to_end(&mut data).unwrap();
let (cow, _encoding_used, _had_errors) = GBK.decode(&data);
let html = cow.into_owned();
let doc = Html::parse_fragment(&html);
let tbody_tr_selector = Selector::parse("tbody tr").unwrap();
let td_selector = Selector::parse("td").unwrap();
for tr in doc.select(&tbody_tr_selector) {
let td_list = tr.select(&td_selector);
let mut protocols = "".into();
let mut ip_addr = "".into();
let mut port = 0;
for (i, ele) in td_list.enumerate() {
if i == 0 {
ip_addr = ele.inner_html();
}
if i == 1 {
port = ele.inner_html().parse().unwrap();
}
if i == 4 {
protocols = ele.inner_html();
}
}
let proxy = ProxyObj::new(protocols, ip_addr, port);
proxy_list.push(proxy);
}
proxy_list
}