html_to_epub/cmd/
mod.rs

1use std::{env, fs};
2use std::error::Error;
3use std::fs::File;
4use std::io;
5use std::path::Path;
6use std::time::Duration;
7
8use epub_builder::{EpubBuilder, EpubContent, ReferenceType};
9use epub_builder::ZipLibrary;
10use futures::executor::block_on;
11use futures::future;
12use log::info;
13use reqwest::Response;
14use visdom::types::Elements;
15use visdom::Vis;
16
17pub struct HtmlToEpubOption<'a> {
18    pub cover: &'a [u8],
19    pub title: &'a str,
20    pub author: &'a str,
21    pub output: &'a str,
22}
23
24pub struct HtmlToEpub<'a> {
25    html: &'a Vec<String>,
26    option: HtmlToEpubOption<'a>,
27    epub: EpubBuilder<ZipLibrary>,
28}
29
30impl<'a> HtmlToEpub<'a> {
31    pub fn new(html: &'a Vec<String>, option: HtmlToEpubOption<'a>) -> Self {
32        Self {
33            html,
34            option,
35            epub: EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(),
36        }
37    }
38
39    pub fn run(&mut self) -> Result<(), Box<dyn std::error::Error + 'static>> {
40        self.make_book()?;
41
42        for (i, html) in self.html.iter().enumerate() {
43            info!("process {}", html);
44            self.add_html(&format!("section{}.xhtml", i), html)?;
45        }
46
47        let mut output = File::create(self.option.output)?;
48
49        self.epub.generate(&mut output)?;
50
51        Ok(())
52    }
53
54    fn make_book(&mut self) -> epub_builder::Result<()> {
55        self.epub.metadata("author", self.option.author)?;
56        self.epub.metadata("title", self.option.title)?;
57        self.epub.add_cover_image("cover.png", self.option.cover, "image/png")?;
58        Ok(())
59    }
60
61    fn add_html(&mut self, name: &str, html: &str) -> Result<(), Box<dyn Error + 'static>> {
62        let data = fs::read_to_string(html)?;
63
64        let doc = Vis::load(&data).unwrap();
65
66        self.save_images(&doc);
67
68        let title_node = doc.find("title");
69        let title = title_node.text();
70        let body = Self::gen_xhtml(doc);
71        let content = EpubContent::new(name, body.as_bytes())
72            .title(title)
73            .reftype(ReferenceType::Text);
74
75        self.epub.add_content(content)?;
76
77        Ok(())
78    }
79
80    fn gen_xhtml(doc: Elements) -> String {
81        doc.find("html").set_attr("xmlns", Option::from("http://www.w3.org/1999/xhtml"));
82
83        let xhtml = r#"<?xml version="1.0" encoding="UTF-8"?>
84<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
85"#.to_string() + &doc.outer_html();
86
87        return xhtml.to_owned();
88    }
89
90    fn save_images(&mut self, doc: &Elements) {
91        let mut dls: Vec<(String, String)> = Vec::new();
92
93        doc.find("img").each(|i, e| {
94            if let Some(src) = e.get_attribute("src") {
95                fs::create_dir_all("image").unwrap();
96                let mut save = format!("image/{}", i);
97                if let Some(ext) = Path::new(&src.to_string()).extension() {
98                    save = format!("{}.{}", save, ext.to_str().unwrap());
99                }
100                e.set_attribute("src", Option::Some(&save));
101                dls.push((src.to_string(), save));
102            }
103            true
104        });
105
106        self.download_urls(dls);
107
108        doc.find("img").each(|_i, e| {
109            if let Some(src) = e.get_attribute("src") {
110                let path = src.to_string();
111                self.epub.add_resource(&path, fs::File::open(&path).unwrap(), "image/jpeg").unwrap();
112            }
113            true
114        });
115    }
116
117    fn download_urls(&self, mut urls: Vec<(String, String)>) {
118        while !urls.is_empty() {
119            let mut list = Vec::new();
120            for _ in 0..3 {
121                if urls.is_empty() {
122                    break;
123                }
124                let (url, save) = urls.remove(0);
125                info!("saving {} as {}", url, save);
126                list.push(Self::download(url, save));
127            }
128            block_on(future::join_all(list));
129        }
130    }
131
132    async fn download(url: String, target: String) -> Result<(), Box<dyn Error>> {
133        if let Ok(mut fd) = File::create(target) {
134            let resp = Self::do_get(&url).await?;
135            let bytes = resp.bytes().await?;
136            io::copy(&mut bytes.as_ref(), &mut fd)?;
137        }
138        Ok(())
139    }
140
141    async fn do_get(url: &str) -> Result<Response, reqwest::Error> {
142        let mut builder = reqwest::Client::builder().timeout(Duration::from_secs(120));
143        if let Ok(http_proxy) = env::var("http_proxy") {
144            builder = builder.proxy(reqwest::Proxy::all(http_proxy)?);
145        }
146        builder.build()?.get(url)
147            .header("user-agent", USER_AGENT)
148            .timeout(Duration::new(120, 0))
149            .send().await
150    }
151}
152
153const USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:93.0) Gecko/20100101 Firefox/93.0";