story-dl 0.2.0

Story web scraping
Documentation
use {
    chrono::Local,
    clap::{App, Arg},
    log::{Level, Log, Metadata, Record},
    std::{
        fmt, fs,
        io::{self, Write},
    },
    story_dl::{archive_of_our_own, fanfiction, models::Story, Uri},
};

#[actix_rt::main]
async fn main() -> Result<(), Error> {
    log::set_boxed_logger(Box::new(Logger::new(log::Level::Info)?))?;
    log::set_max_level(log::Level::Info.to_level_filter());

    let matches = App::new(clap::crate_name!())
        .author(clap::crate_authors!())
        .about(clap::crate_description!())
        .version(clap::crate_version!())
        .arg(
            Arg::with_name("file")
                .short("f")
                .value_name("FILE")
                .takes_value(true)
                .conflicts_with("url")
                .help("File to use as a scraping list"),
        )
        .arg(
            Arg::with_name("url")
                .short("u")
                .value_name("URL")
                .takes_value(true)
                .help("URL of the story to download"),
        )
        .arg(
            Arg::with_name("output")
                .short("o")
                .value_name("FORMAT")
                .possible_values(&Output::variants())
                .default_value("epub")
                .required(true)
                .help("Output format of the downloaded story"),
        )
        .get_matches();

    let output = matches.value_of("output").unwrap().parse::<Output>()?;

    if matches.is_present("file") {
        let file = matches.value_of("file").unwrap();

        let imports = serde_json::from_slice::<Import>(&fs::read(file)?[..])?;

        for import in imports {
            let url: Uri = match import {
                Element::Text(url) => url,
                Element::Url { url } => url,
            }
            .parse()?;

            let story = scrape(&url).await?;

            export(story, output)?;
        }
    } else if matches.is_present("url") {
        let url: Uri = matches.value_of("url").unwrap().parse()?;

        let story = scrape(&url).await?;

        export(story, output)?;
    }

    Ok(())
}

#[derive(Copy, Clone)]
enum Output {
    EPub,
    Json,
    MessagePack,
}

impl Output {
    fn variants() -> [&'static str; 3] {
        ["epub", "json", "message-pack"]
    }

    fn file_extension(self) -> &'static str {
        match self {
            Output::EPub => "epub",
            Output::Json => "json",
            Output::MessagePack => "msgpk",
        }
    }
}

impl std::str::FromStr for Output {
    type Err = Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "epub" => Ok(Output::EPub),
            "json" => Ok(Output::Json),
            "message-pack" => Ok(Output::MessagePack),
            _ => Err(Error::ParseOutput),
        }
    }
}

impl fmt::Display for Output {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Output::EPub => write!(f, "epub"),
            Output::Json => write!(f, "json"),
            Output::MessagePack => write!(f, "message-pack"),
        }
    }
}
async fn scrape(url: &Uri) -> Result<Story, Error> {
    let site = Site::from_url(url).expect("Given url doesn't match any known site");

    let story = match site {
        Site::ArchiveOfOurOwn => archive_of_our_own::scrape(url).await?,
        Site::FanFiction => fanfiction::scrape(url).await?,
    };

    Ok(story)
}

fn export(story: Story, output: Output) -> Result<(), Error> {
    let mut buffer = Vec::with_capacity(story.words as usize);

    match output {
        Output::EPub => {
            use {
                comrak::{markdown_to_html, ComrakOptions},
                epub_builder::{EpubBuilder, EpubContent, ZipLibrary},
            };

            let options = ComrakOptions::default();

            let mut epub = EpubBuilder::new(ZipLibrary::new()?)?;

            epub.metadata("title", &story.name)?;
            epub.metadata("author", story.authors.join(", "))?;
            epub.metadata("description", &story.summary)?;

            for (i, chapter) in story.chapters.iter().enumerate() {
                let body = itertools::join(
                    &[
                        format!(
                            r#"<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  <meta http-equiv="Content-Style-Type" content="text/css" />
  <meta name="generator" content="Rust EPUB library" />
  <title>{}</title>
  <link rel="stylesheet" type="text/css" href="stylesheet.css" />
</head>
<body>"#,
                            chapter.name
                        ),
                        markdown_to_html(&chapter.pre, &options),
                        markdown_to_html(&chapter.main, &options),
                        markdown_to_html(&chapter.post, &options),
                        String::from("</body></html>"),
                    ],
                    "<br/><hr/><br/>",
                );

                epub.add_content(
                    EpubContent::new(format!("chapter-{}.xhtml", i + 1), body.as_bytes())
                        .title(&chapter.name),
                )?;
            }

            epub.inline_toc();

            epub.generate(&mut buffer)?;
        }
        Output::Json => {
            serde_json::to_writer(&mut buffer, &story)?;
        }
        Output::MessagePack => {
            rmps::encode::write(&mut buffer, &story)?;
        }
    }

    fs::write(
        format!(
            "{} - {}.{}",
            story.name,
            story.authors.join(", "),
            output.file_extension()
        ),
        &buffer,
    )?;

    Ok(())
}

enum Site {
    ArchiveOfOurOwn,
    FanFiction,
}

impl Site {
    fn from_url(url: &Uri) -> Option<Self> {
        url.host().and_then(|host| match host {
            "archiveofourown.org" | "www.archiveofourown.org" => Some(Site::ArchiveOfOurOwn),
            "fanfiction.net" | "www.fanfiction.net" | "m.fanfiction.net" => Some(Site::FanFiction),
            _ => None,
        })
    }
}

pub type Import = Vec<Element>;

#[derive(Debug, serde::Serialize, serde::Deserialize)]
#[serde(untagged)]
pub enum Element {
    Url { url: String },
    Text(String),
}

pub trait Drain {
    fn level(&self) -> Level;
    fn ignore(&self, record: &Record) -> bool;
    fn write(&self, string: &str) -> std::io::Result<()>;
    fn flush(&self) -> std::io::Result<()>;
}

pub struct StdoutDrain {
    level: Level,
}

impl StdoutDrain {
    pub fn new(level: Level) -> io::Result<Self> {
        Ok(Self { level })
    }
}

impl Drain for StdoutDrain {
    fn level(&self) -> Level {
        self.level
    }

    fn ignore(&self, _record: &Record) -> bool {
        false
    }

    fn write(&self, string: &str) -> io::Result<()> {
        writeln!(&mut io::stdout(), "{}", string)
    }

    fn flush(&self) -> io::Result<()> {
        io::stdout().flush()
    }
}

#[derive(Debug)]
enum Error {
    Io { err: std::io::Error },

    StoryDL { err: story_dl::Error },

    Uri { err: awc::http::uri::InvalidUri },

    EPub { err: epub_builder::Error },
    Json { err: serde_json::Error },
    MessagePack { err: rmps::encode::Error },

    Logger { err: log::SetLoggerError },
    ParseOutput,
}

impl From<std::io::Error> for Error {
    fn from(err: std::io::Error) -> Error {
        Error::Io { err }
    }
}

impl From<story_dl::Error> for Error {
    fn from(err: story_dl::Error) -> Error {
        Error::StoryDL { err }
    }
}

impl From<awc::http::uri::InvalidUri> for Error {
    fn from(err: awc::http::uri::InvalidUri) -> Error {
        Error::Uri { err }
    }
}

impl From<epub_builder::Error> for Error {
    fn from(err: epub_builder::Error) -> Error {
        Error::EPub { err }
    }
}

impl From<serde_json::Error> for Error {
    fn from(err: serde_json::Error) -> Error {
        Error::Json { err }
    }
}

impl From<rmps::encode::Error> for Error {
    fn from(err: rmps::encode::Error) -> Error {
        Error::MessagePack { err }
    }
}

impl From<log::SetLoggerError> for Error {
    fn from(err: log::SetLoggerError) -> Error {
        Error::Logger { err }
    }
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Error::Io { err } => write!(f, "io error {}", err),

            Error::StoryDL { err } => write!(f, "story downloader error {}", err),

            Error::Uri { err } => write!(f, "invalid url {}", err),

            Error::EPub { err } => write!(f, "epub error {}", err),
            Error::Json { err } => write!(f, "json {}", err),
            Error::MessagePack { err } => write!(f, "message-pack error {}", err),

            Error::Logger { err } => write!(f, "log {}", err),
            Error::ParseOutput => write!(f, "valid values: [epub, json, message-pack]",),
        }
    }
}

impl std::error::Error for Error {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        match self {
            Error::Io { ref err } => Some(err),

            Error::StoryDL { ref err } => Some(err),

            Error::Uri { ref err } => Some(err),

            Error::EPub { ref err } => Some(err),
            Error::Json { ref err } => Some(err),
            Error::MessagePack { ref err } => Some(err),

            Error::Logger { ref err } => Some(err),
            Error::ParseOutput => None,
        }
    }
}

pub struct Logger {
    stdout: StdoutDrain,
}

impl Logger {
    pub fn new(level: Level) -> io::Result<Self> {
        Ok(Self {
            stdout: StdoutDrain::new(level)?,
        })
    }

    fn log_format(record: &Record) -> String {
        let target = if !record.target().is_empty() {
            record.target()
        } else {
            record.module_path().unwrap_or_default()
        };

        format!(
            "{} {:<5} [{}] {}",
            Local::now().format("%Y-%m-%d %H:%M:%S,%3f"),
            record.level().to_string(),
            target,
            record.args(),
        )
    }
}

impl Log for Logger {
    fn enabled(&self, metadata: &Metadata) -> bool {
        metadata.level() <= self.stdout.level()
    }

    fn log(&self, record: &Record) {
        let msg = Self::log_format(&record);

        if let Err(err) = self.stdout.write(&msg) {
            eprintln!("Could not write to target: {}", err);
        }
    }

    fn flush(&self) {
        if let Err(err) = self.stdout.flush() {
            eprintln!("Could not flush drain: {}", err);
        }
    }
}