cloudflare-bypasser 0.1.5

A crate to bypass Cloudflare's anti-bot page, inspired by python module [cloudflare-scrape](https://github.com/Anorov/cloudflare-scrape)
Documentation
extern crate base64;
extern crate fake_useragent;
extern crate regex;
extern crate reqwest;
extern crate url;

// --- std ---
use std::time::Duration;
// --- external ---
use reqwest::{
    ClientBuilder,
    header::{COOKIE, REFERER, SET_COOKIE, USER_AGENT, HeaderValue},
};

pub struct Bypasser<'a> {
    wait: u8,
    retry: u32,
    proxy: Option<&'a str>,
    user_agent: String,
    client: reqwest::Client,
    user_agents: Option<fake_useragent::UserAgents>,
}

impl<'a> Bypasser<'a> {
    pub fn new() -> Bypasser<'a> {
        Bypasser {
            wait: 0,
            retry: 1,
            proxy: None,
            user_agent: String::new(),
            client: ClientBuilder::new()
                .danger_accept_invalid_certs(true)
                .danger_accept_invalid_hostnames(true)
                .gzip(true)
                .build()
                .unwrap(),
            user_agents: None,
        }
    }

    pub fn wait(mut self, secs: u8) -> Self {
        self.wait = secs;
        self
    }

    pub fn user_agent(mut self, user_agent: &str) -> Self {
        self.user_agent = user_agent.to_owned();
        self
    }

    pub fn random_user_agent(mut self, flag: bool) -> Self {
        if flag {
            self.user_agents = Some(fake_useragent::UserAgentsBuilder::new()
                .cache(false)
                .set_browsers(fake_useragent::Browsers::new()
                    .set_chrome()
                    .set_firefox()
                    .set_safari())
                .build());
        }
        self
    }

    pub fn proxy(mut self, address: &'a str) -> Self {
        self.proxy = Some(address);
        self
    }

    pub fn retry(mut self, times: u32) -> Self {
        self.retry = times;
        self
    }

    fn build_client(&mut self) -> &mut Self {
        let mut client_builder = ClientBuilder::new()
            .danger_accept_invalid_certs(true)
            .danger_accept_invalid_hostnames(true)
            .gzip(true)
            .redirect(reqwest::RedirectPolicy::none())
            .timeout(Duration::from_secs(30));
        if let Some(address) = self.proxy { client_builder = client_builder.proxy(reqwest::Proxy::all(address).unwrap()); }
        self.client = client_builder.build().unwrap();
        self
    }

    fn parse_challenge(html: &str) -> Vec<(String, String)> {
        regex::Regex::new(r#"name="(s|jschl_vc|pass)"(?: [^<>]*)? value="(.+?)""#)
            .unwrap()
            .captures_iter(html)
            .map(|caps| (caps[1].to_owned(), caps[2].to_owned()))
            .collect()
    }

    fn parse_js(html: &str, domain: &str) -> String {
        // --- external ---
        use regex::Regex;

        let challenge = &Regex::new(r#"setTimeout\(function\(\)\{\s+(var s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n"#)
            .unwrap()
            .captures(html)
            .unwrap()[1];
        let inner_html = if let Some(caps) = Regex::new(r#"<div(?: [^<>]*)? id=\\"cf-dn.*?\\">([^<>]*)"#).unwrap().captures(html){ caps[1].to_owned() } else { String::new() };
        let challenge = base64::encode(&format!(
            r#"
                var document = {{
                    createElement: function () {{
                        return {{ firstChild: {{ href: "http://{}/" }} }}
                    }},
                    getElementById: function () {{
                        return {{"innerHTML": "{}"}};
                    }}
                }};
                {}; a.value
            "#,
            domain,
            inner_html,
            challenge
        ));

        format!(
            r#"
                var atob = Object.setPrototypeOf(function (str) {{
                    try {{
                        return Buffer.from("" + str, "base64").toString("binary");
                    }} catch (e) {{}}
                }}, null);
                var challenge = atob("{}");
                var context = Object.setPrototypeOf({{ atob: atob }}, null);
                var options = {{
                    filename: "iuam-challenge.js",
                    contextOrigin: "cloudflare:iuam-challenge.js",
                    contextCodeGeneration: {{ strings: true, wasm: false }},
                    timeout: 5000
                }};
                process.stdout.write(String(
                    require("vm").runInNewContext(challenge, context, options)
                ));
            "#,
            challenge
        )
    }

    fn run_js(js: &str) -> String {
        String::from_utf8(
            std::process::Command::new("node")
                .args(&["-e", js])
                .output()
                .unwrap()
                .stdout
        ).unwrap()
    }

    fn request_challenge(&mut self, url: &str) -> (String, String, HeaderValue) {
        self.build_client();
        if let Some(ref user_agents) = self.user_agents { self.user_agent = user_agents.random().to_string(); }
        loop {
            match self.client
                .get(url)
                .header(USER_AGENT, self.user_agent.as_str())
                .send() {
                Ok(mut resp) => {
                    match resp.text() {
                        Ok(text) => {
                            return (
                                text,
                                resp.url().as_str().to_owned(),
                                resp.headers()[SET_COOKIE].to_owned()
                            );
                        }
                        Err(e) => println!("At request_challenge() text(), {:?}", e)
                    }
                }
                Err(e) => println!("At, request_challenge() send(), {:?}", e)
            }
        }
    }

    fn solve_challenge(&mut self, url: &str, cookie: &HeaderValue, referer: &str, params: &[(String, String)]) -> Result<(HeaderValue, HeaderValue), &str> {
        let mut retry = 0u32;
        loop {
            match self.client
                .get(url)
                .header(COOKIE, cookie)
                .header(REFERER, referer)
                .header(USER_AGENT, self.user_agent.as_str())
                .query(params)
                .send() {
                Ok(resp) => if resp.headers().contains_key(SET_COOKIE) {
                    return Ok((
                        resp.headers()[SET_COOKIE].to_owned(),
                        self.user_agent.parse().unwrap(),
                    ));
                }
                Err(e) => println!("{:?}", e)
            }

            retry += 1;
            if retry == self.retry { return Err("reach max retries"); }
        }
    }

    pub fn bypass(&mut self, url: &str) -> Result<(HeaderValue, HeaderValue), &str> {
        std::thread::sleep(Duration::from_secs(self.wait as u64));

        let (html, referer, cookie) = self.request_challenge(url);
        let (challenge_url, domain) = {
            let url = url::Url::parse(url).unwrap();
            let domain = url.domain().unwrap().to_owned();

            (format!("{}://{}/cdn-cgi/l/chk_jschl", url.scheme(), domain), domain)
        };
        let params =  {
            let mut p = Bypasser::parse_challenge(&html);
            p.push((String::from("jschl_answer"), Bypasser::run_js(&Bypasser::parse_js(&html, &domain))));

            p
        };

        self.solve_challenge(&challenge_url, &cookie, &referer, &params)
    }
}