story-dl 0.1.0

Story web scraping
Documentation
use {
    http_req::{
        request::{Method, Request},
        uri::Uri,
    },
    libflate::{deflate, gzip},
    rand::Rng,
    std::{io, thread, time},
};

#[cfg(all(target_os = "linux", target_arch = "x86_64"))]
const USER_AGENT: &str = concat!(
    "Mozilla/5.0 (X11; Linux x86_64; rv:63.0) Servo/1.0 Firefox/63.0 StoryDL/",
    env!("CARGO_PKG_VERSION"),
    " (txuritan@protonmail.com)"
);
#[cfg(all(target_os = "linux", not(target_arch = "x86_64")))]
const USER_AGENT: &str = concat!(
    "Mozilla/5.0 (X11; Linux i686; rv:63.0) Servo/1.0 Firefox/63.0 StoryDL/",
    env!("CARGO_PKG_VERSION"),
    " (txuritan@protonmail.com)"
);

#[cfg(all(target_os = "windows", target_arch = "x86_64"))]
const USER_AGENT: &str = concat!(
    "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:63.0) Servo/1.0 Firefox/63.0 StoryDL/",
    env!("CARGO_PKG_VERSION"),
    " (txuritan@protonmail.com)"
);
#[cfg(all(target_os = "windows", not(target_arch = "x86_64")))]
const USER_AGENT: &str = concat!(
    "Mozilla/5.0 (Windows NT 6.1; rv:63.0) Servo/1.0 Firefox/63.0 StoryDL/",
    env!("CARGO_PKG_VERSION"),
    " (txuritan@protonmail.com)"
);

#[cfg(not(any(target_os = "linux", target_os = "windows")))]
// Neither Linux nor Windows, so maybe OS X, and if not then OS X is an okay fallback.
const USER_AGENT: &str = concat!(
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:63.0) Servo/1.0 Firefox/63.0 StoryDL/",
    env!("CARGO_PKG_VERSION"),
    " (txuritan@protonmail.com)"
);

#[cfg(target_os = "android")]
const USER_AGENT: &str = concat!(
    "Mozilla/5.0 (Android; Mobile; rv:63.0) Servo/1.0 Firefox/63.0 StoryDL/",
    env!("CARGO_PKG_VERSION"),
    " (txuritan@protonmail.com)"
);
#[cfg(target_os = "ios")]
const USER_AGENT: &str = concat!(
    "Mozilla/5.0 (iPhone; CPU iPhone OS 8_3 like Mac OS X; rv:63.0) Servo/1.0 Firefox/63.0 StoryDL/",
    env!("CARGO_PKG_VERSION"),
    " (txuritan@protonmail.com)"
);

pub(crate) fn request(url: &Uri) -> Result<Vec<u8>, RequestError> {
    let mut temp = Vec::with_capacity(1_000);

    let res = Request::new(url)
        .method(Method::GET)
        .header(
            "Accept",
            "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        )
        // FanFiction.net has a broken GZIP encoding (Unexpected GZIP ID: value=[50, 56], expected=[31, 139])
        // They will still send GZIP even if "identity" is used, but at least its not broken
        .header("Accept-Encoding", "identity")
        .header("Accept-Language", "en-US,en;q=0.5")
        .header("User-Agent", USER_AGENT)
        .send(&mut temp)?;

    if res.status_code().is_success() {
        match res.headers().get("Content-Encoding").map(String::as_str) {
            Some("deflate") => {
                let mut body = Vec::with_capacity(temp.len() as usize);

                let mut decoded = deflate::Decoder::new(&temp[..]);

                io::copy(&mut decoded, &mut body)?;

                Ok(body)
            }
            Some("gzip") => {
                let mut body = Vec::with_capacity(temp.len() as usize);

                let mut decoded = gzip::Decoder::new(&temp[..])?;

                io::copy(&mut decoded, &mut body)?;

                Ok(body)
            }
            Some(_) => Err(RequestError::InvalidEncoding),
            None => Ok(temp),
        }
    } else {
        Err(RequestError::Non200Response)
    }
}

#[derive(Debug)]
pub(crate) enum RequestError {
    Io { err: std::io::Error },

    InvalidEncoding,
    Non200Response,
    Http { err: http::Error },
    HttpReq { err: http_req::error::Error },
}

impl std::fmt::Display for RequestError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            RequestError::Io { err } => write!(f, "(Io) {}", err),

            RequestError::InvalidEncoding => write!(
                f,
                "(InvalidEncoding) Server returned with and encoding that the scraper can't handle"
            ),
            RequestError::Non200Response => write!(
                f,
                "(Non200Response) Server returned with a non 200 status code"
            ),
            RequestError::Http { ref err } => write!(f, "(Http) {}", err),
            RequestError::HttpReq { ref err } => write!(f, "(HttpReq) {}", err),
        }
    }
}

impl From<std::io::Error> for RequestError {
    fn from(err: std::io::Error) -> RequestError {
        RequestError::Io { err }
    }
}

impl From<http::Error> for RequestError {
    fn from(err: http::Error) -> RequestError {
        RequestError::Http { err }
    }
}

impl From<http_req::error::Error> for RequestError {
    fn from(err: http_req::error::Error) -> RequestError {
        RequestError::HttpReq { err }
    }
}

impl std::error::Error for RequestError {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match self {
            RequestError::Io { ref err } => Some(err),

            RequestError::InvalidEncoding => None,
            RequestError::Non200Response => None,
            RequestError::Http { ref err } => Some(err),
            RequestError::HttpReq { ref err } => Some(err),
        }
    }
}

pub(crate) fn sleep() {
    let length = rand::thread_rng().gen_range(5, 10);

    log::info!("[util] Sleeping for {} seconds", length);

    thread::sleep(time::Duration::from_secs(length));
}

pub(crate) fn word_count(str: &str) -> u32 {
    str.split_whitespace()
        .filter(|s| match *s {
            "---" => false,
            "#" | "##" | "###" | "####" | "#####" | "######" => false,
            "*" | "**" => false,
            _ => true,
        })
        .count() as u32
}