1use std::{env, fs};
2use std::error::Error;
3use std::fs::File;
4use std::io;
5use std::path::Path;
6use std::time::Duration;
7
8use epub_builder::{EpubBuilder, EpubContent, ReferenceType};
9use epub_builder::ZipLibrary;
10use futures::executor::block_on;
11use futures::future;
12use log::info;
13use reqwest::Response;
14use visdom::types::Elements;
15use visdom::Vis;
16
17pub struct HtmlToEpubOption<'a> {
18 pub cover: &'a [u8],
19 pub title: &'a str,
20 pub author: &'a str,
21 pub output: &'a str,
22}
23
24pub struct HtmlToEpub<'a> {
25 html: &'a Vec<String>,
26 option: HtmlToEpubOption<'a>,
27 epub: EpubBuilder<ZipLibrary>,
28}
29
30impl<'a> HtmlToEpub<'a> {
31 pub fn new(html: &'a Vec<String>, option: HtmlToEpubOption<'a>) -> Self {
32 Self {
33 html,
34 option,
35 epub: EpubBuilder::new(ZipLibrary::new().unwrap()).unwrap(),
36 }
37 }
38
39 pub fn run(&mut self) -> Result<(), Box<dyn std::error::Error + 'static>> {
40 self.make_book()?;
41
42 for (i, html) in self.html.iter().enumerate() {
43 info!("process {}", html);
44 self.add_html(&format!("section{}.xhtml", i), html)?;
45 }
46
47 let mut output = File::create(self.option.output)?;
48
49 self.epub.generate(&mut output)?;
50
51 Ok(())
52 }
53
54 fn make_book(&mut self) -> epub_builder::Result<()> {
55 self.epub.metadata("author", self.option.author)?;
56 self.epub.metadata("title", self.option.title)?;
57 self.epub.add_cover_image("cover.png", self.option.cover, "image/png")?;
58 Ok(())
59 }
60
61 fn add_html(&mut self, name: &str, html: &str) -> Result<(), Box<dyn Error + 'static>> {
62 let data = fs::read_to_string(html)?;
63
64 let doc = Vis::load(&data).unwrap();
65
66 self.save_images(&doc);
67
68 let title_node = doc.find("title");
69 let title = title_node.text();
70 let body = Self::gen_xhtml(doc);
71 let content = EpubContent::new(name, body.as_bytes())
72 .title(title)
73 .reftype(ReferenceType::Text);
74
75 self.epub.add_content(content)?;
76
77 Ok(())
78 }
79
80 fn gen_xhtml(doc: Elements) -> String {
81 doc.find("html").set_attr("xmlns", Option::from("http://www.w3.org/1999/xhtml"));
82
83 let xhtml = r#"<?xml version="1.0" encoding="UTF-8"?>
84<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
85"#.to_string() + &doc.outer_html();
86
87 return xhtml.to_owned();
88 }
89
90 fn save_images(&mut self, doc: &Elements) {
91 let mut dls: Vec<(String, String)> = Vec::new();
92
93 doc.find("img").each(|i, e| {
94 if let Some(src) = e.get_attribute("src") {
95 fs::create_dir_all("image").unwrap();
96 let mut save = format!("image/{}", i);
97 if let Some(ext) = Path::new(&src.to_string()).extension() {
98 save = format!("{}.{}", save, ext.to_str().unwrap());
99 }
100 e.set_attribute("src", Option::Some(&save));
101 dls.push((src.to_string(), save));
102 }
103 true
104 });
105
106 self.download_urls(dls);
107
108 doc.find("img").each(|_i, e| {
109 if let Some(src) = e.get_attribute("src") {
110 let path = src.to_string();
111 self.epub.add_resource(&path, fs::File::open(&path).unwrap(), "image/jpeg").unwrap();
112 }
113 true
114 });
115 }
116
117 fn download_urls(&self, mut urls: Vec<(String, String)>) {
118 while !urls.is_empty() {
119 let mut list = Vec::new();
120 for _ in 0..3 {
121 if urls.is_empty() {
122 break;
123 }
124 let (url, save) = urls.remove(0);
125 info!("saving {} as {}", url, save);
126 list.push(Self::download(url, save));
127 }
128 block_on(future::join_all(list));
129 }
130 }
131
132 async fn download(url: String, target: String) -> Result<(), Box<dyn Error>> {
133 if let Ok(mut fd) = File::create(target) {
134 let resp = Self::do_get(&url).await?;
135 let bytes = resp.bytes().await?;
136 io::copy(&mut bytes.as_ref(), &mut fd)?;
137 }
138 Ok(())
139 }
140
141 async fn do_get(url: &str) -> Result<Response, reqwest::Error> {
142 let mut builder = reqwest::Client::builder().timeout(Duration::from_secs(120));
143 if let Ok(http_proxy) = env::var("http_proxy") {
144 builder = builder.proxy(reqwest::Proxy::all(http_proxy)?);
145 }
146 builder.build()?.get(url)
147 .header("user-agent", USER_AGENT)
148 .timeout(Duration::new(120, 0))
149 .send().await
150 }
151}
152
153const USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:93.0) Gecko/20100101 Firefox/93.0";