mdbook_epub/
generator.rs

1use crate::{file_io, DEFAULT_CSS};
2use crate::config::Config;
3use crate::filters::asset_link::AssetRemoteLinkFilter;
4use crate::filters::footnote::FootnoteFilter;
5use crate::filters::quote_converter::QuoteConverterFilter;
6use crate::resources::asset::Asset;
7use crate::resources::resource::{self};
8use crate::resources::retrieve::{ContentRetriever, ResourceHandler};
9use crate::validation::validate_config_epub_version;
10use crate::{Error, utils};
11use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
12use handlebars::{Handlebars, RenderError, RenderErrorReason};
13use mdbook_core::book::{BookItem, Chapter};
14use mdbook_renderer::RenderContext;
15use pulldown_cmark::html;
16use serde_json::json;
17use std::collections::HashSet;
18use std::path::Path;
19use std::{
20    collections::HashMap,
21    fmt::{self, Debug, Formatter},
22    fs::File,
23    io::{Read, Write},
24    iter,
25    path::PathBuf,
26};
27use tracing::{debug, error, info, trace, warn};
28
29/// The actual EPUB book renderer.
30pub struct Generator<'a> {
31    ctx: &'a RenderContext,
32    builder: EpubBuilder<ZipLibrary>,
33    config: Config,
34    hbs: Handlebars<'a>,
35    assets: HashMap<String, Asset>,
36    handler: Box<dyn ContentRetriever>,
37}
38
39impl<'a> Generator<'a> {
40    pub fn new(ctx: &'a RenderContext) -> Result<Generator<'a>, Error> {
41        Self::new_with_handler(ctx, ResourceHandler)
42    }
43
44    fn new_with_handler(
45        ctx: &'a RenderContext,
46        handler: impl ContentRetriever + 'static,
47    ) -> Result<Generator<'a>, Error> {
48        let handler = Box::new(handler);
49        let config = Config::from_render_context(ctx)?;
50
51        let epub_version = validate_config_epub_version(&config)?;
52
53        let mut builder = EpubBuilder::new(ZipLibrary::new()?)?;
54        if let Some(version) = epub_version {
55            builder.epub_version(version);
56        }
57
58        let mut hbs = Handlebars::new();
59        hbs.register_template_string("index", config.template()?)
60            .map_err(|_| Error::TemplateParse)?;
61
62        Ok(Generator {
63            builder,
64            ctx,
65            config,
66            hbs,
67            assets: HashMap::new(),
68            handler,
69        })
70    }
71
72    fn populate_metadata(&mut self) -> Result<(), Error> {
73        info!("1. populate metadata ==");
74        self.builder.metadata("generator", "mdbook-epub")?;
75
76        if let Some(title) = self.ctx.config.book.title.clone() {
77            self.builder.metadata("title", title)?;
78        } else {
79            warn!("No `title` attribute found yet all EPUB documents should have a title");
80        }
81
82        if let Some(desc) = self.ctx.config.book.description.clone() {
83            self.builder.metadata("description", desc)?;
84        }
85
86        if !self.ctx.config.book.authors.is_empty() {
87            self.builder
88                .metadata("author", self.ctx.config.book.authors.join(", "))?;
89        }
90
91        self.builder.metadata("generator", env!("CARGO_PKG_NAME"))?;
92
93        if let Some(lang) = self.ctx.config.book.language.clone() {
94            self.builder.metadata("lang", lang)?;
95        } else {
96            self.builder.metadata("lang", "en")?;
97        }
98
99        Ok(())
100    }
101
102    pub fn generate<W: Write>(mut self, writer: W) -> Result<(), Error> {
103        info!("Generating the EPUB book");
104
105        self.populate_metadata()?;
106        self.find_assets()?;
107        self.generate_chapters()?;
108
109        self.add_cover_image()?;
110        self.embed_stylesheets()?;
111        self.additional_assets()?;
112        self.additional_resources()?;
113        info!("8. final generation ==");
114        self.builder.generate(writer)?;
115        info!("Generating the EPUB book - DONE !");
116        Ok(())
117    }
118
119    /// Find assets for adding to the document later. For remote linked assets, they would be
120    /// rendered differently in the document by provided information of assets.
121    fn find_assets(&mut self) -> Result<(), Error> {
122        info!("2.1 Start find_assets()...");
123        // resources::find can emit very unclear error based on internal MD content,
124        // so let's give a tip to user in error message
125        let assets = resource::find(self.ctx).map_err(|e| {
126            let error = String::from(
127                "Failed finding/fetch resource taken from content? Look up content for possible error...",
128            );
129            error!("{} Caused by: {}", error, e);
130            e
131        })?;
132        self.assets.extend(assets);
133        info!("2.2 found [{}] assets", self.assets.len());
134        Ok(())
135    }
136
137    fn generate_chapters(&mut self) -> Result<(), Error> {
138        info!("3.1 Generate chapters == ");
139
140        let mut added_count = 0;
141        // add the main chapters + sub-chapters
142        for (idx, item) in self.ctx.book.iter().enumerate() {
143            let is_first = idx == 0;
144            if let BookItem::Chapter(ref ch) = *item {
145                trace!("Adding chapter \"{}\"", ch);
146                self.add_chapter(ch, Some(is_first))?;
147                added_count += 1;
148            }
149        }
150        info!("3.2 Generate [{}] chapters == ", added_count);
151        Ok(())
152    }
153
154    fn add_chapter(&mut self, ch: &Chapter, is_first: Option<bool>) -> Result<(), Error> {
155        info!("Adding chapter = '{}'", &ch.name);
156        let rendered_result = self.render_chapter(ch);
157        // let's skip chapter without content (drafts)
158        let rendered = match rendered_result {
159            Ok(rendered_content) => rendered_content,
160            Err(error_msg) => {
161                warn!(
162                    "SKIPPED chapter '{}' due to error = {}",
163                    &ch.name, error_msg
164                );
165                return Ok(());
166            }
167        };
168
169        let content_path = ch.path.as_ref().ok_or_else(|| {
170            Error::ContentFileNotFound(format!(
171                "Content file was not found for Chapter '{}'",
172                ch.name
173            ))
174        })?;
175        trace!(
176            "add a chapter '{:?}' by a path = '{:?}'",
177            &ch.name, content_path
178        );
179        let path = content_path.with_extension("html").display().to_string();
180        let title = if self.config.no_section_label {
181            ch.name.clone()
182        } else if let Some(ref section_number) = ch.number {
183            format! {"{} {}", section_number, ch.name}
184        } else {
185            ch.name.clone()
186        };
187
188        // If this is the first chapter, mark its type as Text (i.e. "bodymatter") for render_nav().
189        // This ensures at least one item in the nav.xhtml <nav epub:type="landmarks"><ol> list,
190        // otherwise epubcheck shows an error.
191        let mut content = match is_first {
192            Some(true) => EpubContent::new(path, rendered.as_bytes())
193                .title(title)
194                .reftype(epub_builder::ReferenceType::Text),
195            _ => EpubContent::new(path, rendered.as_bytes()).title(title),
196        };
197
198        let level = ch.number.as_ref().map(|n| n.len() as i32 - 1).unwrap_or(0);
199        content = content.level(level);
200
201        self.builder.add_content(content)?;
202
203        Ok(())
204    }
205
206    /// Render the chapter into its fully formed HTML representation.
207    fn render_chapter(&mut self, ch: &Chapter) -> Result<String, RenderError> {
208        let chapter_dir = if let Some(chapter_file_path) = &ch.path {
209            chapter_file_path.parent().ok_or_else(|| {
210                RenderError::from(RenderErrorReason::Other(format!(
211                    "No CSS found by a path = {:?}",
212                    ch.path
213                )))
214            })?
215        } else {
216            return Err(RenderError::from(RenderErrorReason::Other(format!(
217                "Draft chapter: '{}' could not be rendered.",
218                ch.name
219            ))));
220        };
221
222        let mut body = String::with_capacity(3000); // big enough arbitrary size
223
224        let parser = utils::create_new_pull_down_parser(&ch.content);
225        let mut quote_converter = QuoteConverterFilter::new(self.config.curly_quotes);
226        let ch_depth = chapter_dir.components().count();
227
228        debug!("There are = {:?}", self.assets);
229        let mut asset_link_filter =
230            AssetRemoteLinkFilter::new(&mut self.assets, ch_depth, &*self.handler);
231
232        let mut footnote_filter =
233            if self.config.epub_version == Some(3) && self.config.footnote_backrefs {
234                FootnoteFilter::new(self.config.footnote_backrefs)
235            } else {
236                FootnoteFilter::new(false)
237            };
238
239        let events = parser
240            .map(|event| quote_converter.apply(event))
241            .map(|event| asset_link_filter.apply(event))
242            .filter_map(|event| footnote_filter.apply(event));
243
244        trace!("Found Rendering events map = [{:?}]", &events);
245
246        html::push_html(&mut body, events);
247
248        if !footnote_filter.is_empty() {
249            footnote_filter.retain();
250            footnote_filter.sort_by_cached_key();
251            body.push_str("<div class=\"footnotes\" epub:type=\"footnotes\">\n");
252            let events = footnote_filter.get_events();
253            html::push_html(&mut body, events);
254            body.push_str("</div>\n");
255        }
256
257        trace!("Chapter content after Events processing = [{:?}]", body);
258
259        let stylesheet_path = chapter_dir
260            .components()
261            .map(|_| "..")
262            .chain(iter::once("stylesheet.css"))
263            .collect::<Vec<_>>()
264            .join("/");
265
266        let epub_version_3 = self.config.epub_version == Some(3);
267
268        let ctx = json!({
269            "epub_version_3": epub_version_3,
270            "title": ch.name,
271            "body": body,
272            "stylesheet": stylesheet_path
273        });
274
275        self.hbs.render("index", &ctx)
276    }
277
278    /// Generate the stylesheet and add it to the document.
279    fn embed_stylesheets(&mut self) -> Result<(), Error> {
280        info!("5. Embedding stylesheets ==");
281
282        let stylesheet = self.generate_stylesheet()?;
283        self.builder.stylesheet(stylesheet.as_slice())?;
284
285        Ok(())
286    }
287
288    fn additional_assets(&mut self) -> Result<(), Error> {
289        info!(
290            "6. Embedding, downloading additional assets == [{:?}]",
291            self.assets.len()
292        );
293
294        let mut unique_assets = HashSet::new();
295        let mut count = 0;
296        for (_key, asset) in self.assets.iter_mut() {
297            // self.handler.download(asset)?;
298            debug!("Try to add asset : {}", asset);
299            if unique_assets.insert(&asset.location_on_disk) {
300                let mut content = Vec::new();
301                debug!("Read (EARLIER downloaded?) asset from disk : {}", asset);
302                self.handler.read(&asset.location_on_disk, &mut content)?;
303                let mt = asset.mimetype.to_string();
304                self.builder.add_resource(&asset.filename, &*content, mt)?;
305                count += 1;
306            }
307        }
308        debug!("Embedded '{}' additional assets", count);
309        Ok(())
310    }
311
312    fn additional_resources(&mut self) -> Result<(), Error> {
313        info!("7. Embedding additional resources ==");
314
315        let mut count = 0;
316        for path in self.config.additional_resources.iter() {
317            debug!("Embedding resource: {:?}", path);
318
319            let full_path = self.resolve_path(path)?;
320            let mt = mime_guess::from_path(&full_path).first_or_octet_stream();
321
322            let content = file_io(File::open(&full_path), "add-resource", &full_path)?;
323            debug!(
324                "Adding resource [{}]: {:?} / {:?} ",
325                count,
326                path,
327                mt.to_string()
328            );
329            self.builder.add_resource(path, content, mt.to_string())?;
330            count += 1;
331        }
332        debug!("Embedded '{}' additional resources", count);
333        Ok(())
334    }
335
336    fn add_cover_image(&mut self) -> Result<(), Error> {
337        info!("4. Adding cover image ==");
338
339        if let Some(ref path) = self.config.cover_image {
340            let full_path = self.resolve_path(path)?;
341            let mt = mime_guess::from_path(&full_path).first_or_octet_stream();
342
343            let content = file_io(File::open(&full_path), "add-cover-image", &full_path)?;
344            debug!("Adding cover image: {:?} / {:?} ", path, mt.to_string());
345            self.builder
346                .add_cover_image(path, content, mt.to_string())?;
347        }
348
349        Ok(())
350    }
351
352    /// Concatenate all provided stylesheets into one long stylesheet.
353    fn generate_stylesheet(&self) -> Result<Vec<u8>, Error> {
354        let mut stylesheet = Vec::new();
355
356        if self.config.use_default_css {
357            stylesheet.extend(DEFAULT_CSS.as_bytes());
358        }
359
360        for additional_css in &self.config.additional_css {
361            debug!("generating stylesheet: {:?}", &additional_css);
362            let full_path = self.resolve_path(additional_css)?;
363            let mut f = file_io(File::open(&full_path), "open-stylesheet", &full_path)?;
364            file_io(f.read_to_end(&mut stylesheet), "read-stylesheet", additional_css)?;
365        }
366        debug!("found style(s) = [{}]", stylesheet.len());
367        Ok(stylesheet)
368    }
369
370    fn resolve_path(&self, path: &Path) -> Result<PathBuf, Error> {
371        // Try direct canonicalization first
372        if let Ok(resolved) = path.canonicalize() {
373            return Ok(resolved);
374        }
375
376        // Try with book source directory
377        let with_src = self.ctx.root.join(&self.ctx.config.book.src).join(path);
378
379        if let Ok(resolved) = with_src.canonicalize() {
380            return Ok(resolved);
381        }
382
383        // Try with root directory
384        let with_root = self.ctx.root.join(path);
385        with_root
386            .canonicalize()
387            .map_err(|_| Error::ResourceNotFound(path.to_path_buf()))
388    }
389}
390
391impl Debug for Generator<'_> {
392    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
393        f.debug_struct("Generator")
394            .field("ctx", &self.ctx)
395            .field("builder", &self.builder)
396            .field("config", &self.config)
397            .field("assets", &self.assets.keys())
398            .finish()
399    }
400}
401
402#[cfg(test)]
403mod tests {
404    use super::*;
405    use crate::resources::asset::AssetKind;
406    use crate::resources::retrieve::{MockContentRetriever, RetrievedContent, UpdatedAssetData};
407    use mime_guess::mime;
408    use std::io::Cursor;
409    use std::path::Path;
410    use tempfile::TempDir;
411    use url::Url;
412    use crate::init_tracing;
413
414    use std::sync::Once;
415    static INIT: Once = Once::new();
416    pub fn init_logging() {
417        INIT.call_once(|| {
418            let _ = init_tracing();
419        });
420    }
421
422    #[test]
423    fn test_load_assets() {
424        init_logging();
425        let png = "rust-logo.png";
426        let svg = "rust-logo.svg";
427        let url = "https://www.rust-lang.org/static/images/rust-logo-blk.svg";
428        let content = format!(
429            "# Chapter 1\n\n\
430            ![Rust Logo]({png})\n\n\
431            ![Rust Logo remote]({url})\n\n\
432            <img alt=\"Rust Logo in html\" src=\"{svg}\" />\n"
433        );
434        let tmp_dir = TempDir::new().unwrap();
435        let destination = tmp_dir.path().join("mdbook-epub");
436        let json = ctx_with_template(&content, "src", destination.as_path()).to_string();
437        let ctx = RenderContext::from_json(json.as_bytes()).unwrap();
438
439        let mut mock_client = MockContentRetriever::new();
440        // mock_client.expect_download().times(3).returning(|_| Ok(()));
441        mock_client
442            .expect_download()
443            .times(0)
444            .returning(|_| Ok(UpdatedAssetData::default()));
445        // checks local path of assets
446        let book_source = PathBuf::from(&ctx.root)
447            .join(&ctx.config.book.src)
448            .canonicalize()
449            .expect(
450                format!(
451                    "book source root is not found: {}",
452                    &ctx.config.book.src.display()
453                )
454                .as_str(),
455            );
456        let should_be_png = book_source.join(png);
457        let should_be_svg = book_source.join(svg);
458        let hashed_filename = utils::hash_link(&url.parse::<Url>().unwrap());
459        let should_be_url = destination.as_path().join(hashed_filename);
460        for should_be in [should_be_svg, should_be_png, should_be_url] {
461            mock_client
462                .expect_read()
463                .times(1)
464                .withf(move |path, _| path == should_be)
465                .returning(|_, _| Ok(()));
466        }
467
468        let mut g = Generator::new_with_handler(&ctx, mock_client).unwrap();
469        g.find_assets().unwrap();
470        assert_eq!(g.assets.len(), 3);
471        g.additional_assets().unwrap();
472    }
473
474    #[test]
475    fn test_render_assets() {
476        init_logging();
477        let links = [
478            "local.webp",
479            "http://server/remote.svg",
480            "http://server/link.png",
481        ];
482        let tmp_dir = TempDir::new().unwrap();
483        let root = tmp_dir.path().join("mdbook-epub");
484        let mut assets = HashMap::new();
485        let original_link = links[0].to_string();
486        assets.insert(
487            original_link.clone(),
488            Asset {
489                original_link,
490                location_on_disk: root.as_path().join("src").join(links[0]),
491                filename: PathBuf::from(links[0]),
492                mimetype: "image/webp".parse::<mime::Mime>().unwrap(),
493                source: AssetKind::Local(PathBuf::from(links[0])),
494            },
495        );
496        let url = Url::parse(links[1]).unwrap();
497        let hashed_filename = utils::hash_link(&url);
498        let hashed_path = Path::new("cache").join(&hashed_filename);
499        let original_link = links[1].to_string();
500        assets.insert(
501            original_link.clone(),
502            Asset {
503                original_link,
504                location_on_disk: root.as_path().join("book").join(&hashed_path),
505                filename: hashed_path,
506                mimetype: "image/svg+xml".parse::<mime::Mime>().unwrap(),
507                source: AssetKind::Remote(url),
508            },
509        );
510        let markdown_str = format!(
511            "Chapter 1\n\
512            =====\n\n\
513            * [link]({})\n\
514            * ![Local Image]({})\n\
515            * <img alt=\"Remote Image\" src=\"{}\" >\n",
516            links[2], links[0], links[1]
517        );
518
519        struct TestHandler;
520        impl ContentRetriever for TestHandler {
521            fn download(&self, asset: &Asset) -> Result<UpdatedAssetData, Error> {
522                Ok(UpdatedAssetData {
523                    mimetype: asset.mimetype.clone(),
524                    filename: PathBuf::from("78221e8d16c52ea3.svg"),
525                    location_on_disk: asset.location_on_disk.clone(),
526                })
527            }
528            fn retrieve(&self, _url: &str) -> Result<RetrievedContent, Error> {
529                trace!("retrieve by {_url}");
530                let content = "Downloaded content".as_bytes();
531                Ok(RetrievedContent::new(
532                    Box::new(Cursor::new(content)),
533                    "image/svg+xml".to_string(),
534                    "svg".to_string(),
535                    Some(content.len() as u64),
536                ))
537            }
538        }
539        let test_content_retriever = TestHandler {};
540
541        let mut filter = AssetRemoteLinkFilter::new(&mut assets, 0, &test_content_retriever);
542        let parser = utils::create_new_pull_down_parser(&markdown_str);
543        let events = parser.map(|ev| filter.apply(ev));
544        trace!("Events = {:?}", events);
545        let mut html_buf = String::new();
546        html::push_html(&mut html_buf, events);
547        trace!("html_buf = {:?}", html_buf);
548
549        assert_eq!(
550            html_buf,
551            format!(
552                "<h1>Chapter 1</h1>\n\
553                <ul>\n\
554                <li><a href=\"{}\">link</a></li>\n\
555                <li><img src=\"{}\" alt=\"Local Image\" /></li>\n\
556                <li><img alt=\"Remote Image\" src=\"{}\" >\n\
557                </li>\n\
558                </ul>\n",
559                links[2], links[0], hashed_filename
560            )
561        );
562    }
563
564    #[test]
565    fn test_render_remote_assets_in_sub_chapter() {
566        init_logging();
567        let link = "https://upload.wikimedia.org/wikipedia/commons/4/4e/Open_Source_Initiative_keyhole.svg";
568        let tmp_dir = TempDir::new().unwrap();
569        let dest_dir = tmp_dir.path().join("mdbook-epub");
570        let ch1_1 = json!({
571            "Chapter": {
572                "name": "subchapter",
573                "content": format!("# Subchapter\n\n![Image]({link})"),
574                "number": [1,1],
575                "sub_items": [],
576                "path": "chapter_1/subchapter.md",
577                "parent_names": ["Chapter 1"]
578            }
579        });
580        let ch1 = json!({
581            "Chapter": {
582                "name": "Chapter 1",
583                "content": format!("# Chapter 1\n\n![Image]({link})"),
584                "number": [1],
585                "sub_items": [ch1_1],
586                "path": "chapter_1/index.md",
587                "parent_names": []
588            }
589        });
590        let ch2 = json!({
591            "Chapter": {
592                "name": "Chapter 2",
593                "content": format!("# Chapter 2\n\n![Image]({link})"),
594                "number": [2],
595                "sub_items": [],
596                "path": "chapter_2.md",
597                "parent_names": []
598            }
599        });
600        let mut json = ctx_with_template("", "src", dest_dir.as_path());
601        let chvalue = json["book"]["items"].as_array_mut().unwrap();
602        chvalue.clear();
603        chvalue.push(ch1);
604        chvalue.push(ch2);
605
606        let ctx = RenderContext::from_json(json.to_string().as_bytes()).unwrap();
607        let mut g = Generator::new(&ctx).unwrap();
608        g.find_assets().unwrap();
609        assert_eq!(g.assets.len(), 1);
610
611        let pat = |heading, prefix| {
612            format!("<h1>{heading}</h1>\n<p><img src=\"{prefix}e3825a3756080f55.svg\"")
613        };
614        if let BookItem::Chapter(ref ch) = ctx.book.items[0] {
615            let rendered: String = g.render_chapter(ch).unwrap();
616            debug!("1. rendered ===\n{}", &rendered);
617            assert!(rendered.contains(&pat("Chapter 1", "../")));
618
619            if let BookItem::Chapter(ref sub_ch) = ch.sub_items[0] {
620                let sub_rendered = g.render_chapter(sub_ch).unwrap();
621                debug!("2. rendered ===\n{}", &sub_rendered);
622                assert!(sub_rendered.contains(&pat("Subchapter", "../")));
623            } else {
624                panic!();
625            }
626        } else {
627            panic!();
628        }
629        if let BookItem::Chapter(ref ch) = ctx.book.items[1] {
630            let rendered: String = g.render_chapter(ch).unwrap();
631            assert!(rendered.contains(&pat("Chapter 2", "")));
632        } else {
633            panic!();
634        }
635    }
636
637    #[test]
638    #[should_panic]
639    fn test_find_assets_with_wrong_src_dir() {
640        init_logging();
641        let tmp_dir = TempDir::new().unwrap();
642        let json = ctx_with_template(
643            "# Chapter 1\n\n",
644            "nosuchsrc",
645            tmp_dir.path().join("mdbook-epub").as_path(),
646        )
647        .to_string();
648        let ctx = RenderContext::from_json(json.as_bytes()).unwrap();
649        let mut g = Generator::new(&ctx).unwrap();
650        g.find_assets().unwrap();
651    }
652
653    fn ctx_with_template(content: &str, source: &str, destination: &Path) -> serde_json::Value {
654        json!({
655            "version": mdbook_core::MDBOOK_VERSION,
656            "root": "tests/long_book_example",
657            "book": {"items": [{
658                "Chapter": {
659                    "name": "Chapter 1",
660                    "content": content,
661                    "number": [1],
662                    "sub_items": [],
663                    "path": "chapter_1.md",
664                    "parent_names": []
665                }}], "__non_exhaustive": null},
666            "config": {
667                "book": {"authors": [], "language": "en", "text-direction": "ltr",
668                    "src": source, "title": "DummyBook"},
669                "output": {"epub": {"curly-quotes": true}}},
670            "destination": destination
671        })
672    }
673}