proxyhunt 0.1.1

A fast, modern proxy scraper and checker
use anyhow::Result;
use reqwest::Client;
use std::fs;
use crate::proxy::Proxy;
use tracing::{info, warn};

pub struct Scraper {
    client: Client,
}

impl Scraper {
    pub fn new() -> Self {
        Self {
            client: Client::builder()
                .user_agent("proxyhunt/0.1.0")
                .build()
                .unwrap(),
        }
    }

    pub async fn scrape_all(&self, sources: &[String]) -> Vec<Proxy> {
        let mut all_proxies = Vec::new();
        
        for source in sources {
            match self.scrape_source(source).await {
                Ok(proxies) => all_proxies.extend(proxies),
                Err(e) => warn!("Failed to scrape source {}: {}", source, e),
            }
        }

        let unique_proxies = all_proxies.collect_unique();
        info!("Scraped total {} unique proxies", unique_proxies.len());
        unique_proxies
    }

    pub async fn scrape_source(&self, source: &str) -> Result<Vec<Proxy>> {
        let text = if source.starts_with("http://") || source.starts_with("https://") {
            self.client.get(source).send().await?.text().await?
        } else {
            fs::read_to_string(source)?
        };

        Ok(Proxy::parse_multiple(&text))
    }
}

trait UniqueExt {
    fn collect_unique(self) -> Vec<Proxy>;
}

impl UniqueExt for Vec<Proxy> {
    fn collect_unique(self) -> Vec<Proxy> {
        let mut unique = std::collections::HashSet::new();
        let mut result = Vec::new();
        for p in self {
            let key = (p.proto, p.host.clone(), p.port, p.user.clone(), p.pass.clone());
            if unique.insert(key) {
                result.push(p);
            }
        }
        result
    }
}