mdbook_epub/
generator.rs

1use crate::DEFAULT_CSS;
2use crate::config::Config;
3use crate::filters::asset_link::AssetRemoteLinkFilter;
4use crate::filters::footnote::FootnoteFilter;
5use crate::filters::quote_converter::QuoteConverterFilter;
6use crate::resources::asset::Asset;
7use crate::resources::resource::{self};
8use crate::resources::retrieve::{ContentRetriever, ResourceHandler};
9use crate::validation::validate_config_epub_version;
10use crate::{Error, utils};
11use epub_builder::{EpubBuilder, EpubContent, ZipLibrary};
12use handlebars::{Handlebars, RenderError, RenderErrorReason};
13use mdbook::book::{BookItem, Chapter};
14use mdbook::renderer::RenderContext;
15use pulldown_cmark::html;
16use std::collections::HashSet;
17use std::path::Path;
18use std::{
19    collections::HashMap,
20    fmt::{self, Debug, Formatter},
21    fs::File,
22    io::{Read, Write},
23    iter,
24    path::PathBuf,
25};
26
27/// The actual EPUB book renderer.
28pub struct Generator<'a> {
29    ctx: &'a RenderContext,
30    builder: EpubBuilder<ZipLibrary>,
31    config: Config,
32    hbs: Handlebars<'a>,
33    assets: HashMap<String, Asset>,
34    handler: Box<dyn ContentRetriever>,
35}
36
37impl<'a> Generator<'a> {
38    pub fn new(ctx: &'a RenderContext) -> Result<Generator<'a>, Error> {
39        Self::new_with_handler(ctx, ResourceHandler)
40    }
41
42    fn new_with_handler(
43        ctx: &'a RenderContext,
44        handler: impl ContentRetriever + 'static,
45    ) -> Result<Generator<'a>, Error> {
46        let handler = Box::new(handler);
47        let config = Config::from_render_context(ctx)?;
48
49        let epub_version = validate_config_epub_version(&config)?;
50
51        let mut builder = EpubBuilder::new(ZipLibrary::new()?)?;
52        if let Some(version) = epub_version {
53            builder.epub_version(version);
54        }
55
56        let mut hbs = Handlebars::new();
57        hbs.register_template_string("index", config.template()?)
58            .map_err(|_| Error::TemplateParse)?;
59
60        Ok(Generator {
61            builder,
62            ctx,
63            config,
64            hbs,
65            assets: HashMap::new(),
66            handler,
67        })
68    }
69
70    fn populate_metadata(&mut self) -> Result<(), Error> {
71        info!("1. populate metadata ==");
72        self.builder.metadata("generator", "mdbook-epub")?;
73
74        if let Some(title) = self.ctx.config.book.title.clone() {
75            self.builder.metadata("title", title)?;
76        } else {
77            warn!("No `title` attribute found yet all EPUB documents should have a title");
78        }
79
80        if let Some(desc) = self.ctx.config.book.description.clone() {
81            self.builder.metadata("description", desc)?;
82        }
83
84        if !self.ctx.config.book.authors.is_empty() {
85            self.builder
86                .metadata("author", self.ctx.config.book.authors.join(", "))?;
87        }
88
89        self.builder.metadata("generator", env!("CARGO_PKG_NAME"))?;
90
91        if let Some(lang) = self.ctx.config.book.language.clone() {
92            self.builder.metadata("lang", lang)?;
93        } else {
94            self.builder.metadata("lang", "en")?;
95        }
96
97        Ok(())
98    }
99
100    pub fn generate<W: Write>(mut self, writer: W) -> Result<(), Error> {
101        info!("Generating the EPUB book");
102
103        self.populate_metadata()?;
104        self.find_assets()?;
105        self.generate_chapters()?;
106
107        self.add_cover_image()?;
108        self.embed_stylesheets()?;
109        self.additional_assets()?;
110        self.additional_resources()?;
111        info!("8. final generation ==");
112        self.builder.generate(writer)?;
113        info!("Generating the EPUB book - DONE !");
114        Ok(())
115    }
116
117    /// Find assets for adding to the document later. For remote linked assets, they would be
118    /// rendered differently in the document by provided information of assets.
119    fn find_assets(&mut self) -> Result<(), Error> {
120        info!("2.1 Start find_assets()...");
121        let error = String::from(
122            "Failed finding/fetch resource taken from content? Look up content for possible error...",
123        );
124        // resources::find can emit very unclear error based on internal MD content,
125        // so let's give a tip to user in error message
126        let assets = resource::find(self.ctx).map_err(|e| {
127            error!("{} Caused by: {}", error, e);
128            e
129        })?;
130        self.assets.extend(assets);
131        info!("2.2 found [{}] assets", self.assets.len());
132        Ok(())
133    }
134
135    fn generate_chapters(&mut self) -> Result<(), Error> {
136        info!("3.1 Generate chapters == ");
137
138        let mut added_count = 0;
139        for (idx, item) in self.ctx.book.sections.iter().enumerate() {
140            let is_first = idx == 0;
141            if let BookItem::Chapter(ref ch) = *item {
142                trace!("Adding chapter \"{}\"", ch);
143                self.add_chapter(ch, Some(is_first))?;
144                added_count += 1;
145            }
146        }
147        info!("3.2 Generate [{}] chapters == ", added_count);
148        Ok(())
149    }
150
151    fn add_chapter(&mut self, ch: &Chapter, is_first: Option<bool>) -> Result<(), Error> {
152        info!("Adding chapter = '{}'", &ch.name);
153        let rendered_result = self.render_chapter(ch);
154        // let's skip chapter without content (drafts)
155        let rendered = match rendered_result {
156            Ok(rendered_content) => rendered_content,
157            Err(error_msg) => {
158                warn!(
159                    "SKIPPED chapter '{}' due to error = {}",
160                    &ch.name, error_msg
161                );
162                return Ok(());
163            }
164        };
165
166        let content_path = ch.path.as_ref().ok_or_else(|| {
167            Error::ContentFileNotFound(format!(
168                "Content file was not found for Chapter '{}'",
169                ch.name
170            ))
171        })?;
172        trace!(
173            "add a chapter '{:?}' by a path = '{:?}'",
174            &ch.name, content_path
175        );
176        let path = content_path.with_extension("html").display().to_string();
177        let title = if self.config.no_section_label {
178            ch.name.clone()
179        } else if let Some(ref section_number) = ch.number {
180            format! {"{} {}", section_number, ch.name}
181        } else {
182            ch.name.clone()
183        };
184
185        // If this is the first chapter, mark its type as Text (i.e. "bodymatter") for render_nav().
186        // This ensures at least one item in the nav.xhtml <nav epub:type="landmarks"><ol> list,
187        // otherwise epubcheck shows an error.
188        let mut content = match is_first {
189            Some(true) => EpubContent::new(path, rendered.as_bytes())
190                .title(title)
191                .reftype(epub_builder::ReferenceType::Text),
192            _ => EpubContent::new(path, rendered.as_bytes()).title(title),
193        };
194
195        let level = ch.number.as_ref().map(|n| n.len() as i32 - 1).unwrap_or(0);
196        content = content.level(level);
197
198        self.builder.add_content(content)?;
199
200        // second pass to actually add the sub-chapters
201        for sub_item in &ch.sub_items {
202            if let BookItem::Chapter(ref sub_ch) = *sub_item {
203                trace!("add sub-item = {:?}", sub_ch.name);
204                self.add_chapter(sub_ch, None)?;
205            }
206        }
207
208        Ok(())
209    }
210
211    /// Render the chapter into its fully formed HTML representation.
212    fn render_chapter(&mut self, ch: &Chapter) -> Result<String, RenderError> {
213        let chapter_dir = if let Some(chapter_file_path) = &ch.path {
214            chapter_file_path.parent().ok_or_else(|| {
215                RenderError::from(RenderErrorReason::Other(format!(
216                    "No CSS found by a path = {:?}",
217                    ch.path
218                )))
219            })?
220        } else {
221            return Err(RenderError::from(RenderErrorReason::Other(format!(
222                "Draft chapter: '{}' could not be rendered.",
223                ch.name
224            ))));
225        };
226
227        let mut body = String::with_capacity(3000); // big enough arbitrary size
228
229        let parser = utils::create_new_pull_down_parser(&ch.content);
230        let mut quote_converter = QuoteConverterFilter::new(self.config.curly_quotes);
231        let ch_depth = chapter_dir.components().count();
232
233        debug!("There are = {:?}", self.assets);
234        let mut asset_link_filter =
235            AssetRemoteLinkFilter::new(&mut self.assets, ch_depth, &*self.handler);
236
237        let mut footnote_filter =
238            if self.config.epub_version == Some(3) && self.config.footnote_backrefs {
239                FootnoteFilter::new(self.config.footnote_backrefs)
240            } else {
241                FootnoteFilter::new(false)
242            };
243
244        let events = parser
245            .map(|event| quote_converter.apply(event))
246            .map(|event| asset_link_filter.apply(event))
247            .filter_map(|event| footnote_filter.apply(event));
248
249        trace!("Found Rendering events map = [{:?}]", &events);
250
251        html::push_html(&mut body, events);
252
253        if !footnote_filter.is_empty() {
254            footnote_filter.retain();
255            footnote_filter.sort_by_cached_key();
256            body.push_str("<div class=\"footnotes\" epub:type=\"footnotes\">\n");
257            let events = footnote_filter.get_events();
258            html::push_html(&mut body, events);
259            body.push_str("</div>\n");
260        }
261
262        trace!("Chapter content after Events processing = [{:?}]", body);
263
264        let stylesheet_path = chapter_dir
265            .components()
266            .map(|_| "..")
267            .chain(iter::once("stylesheet.css"))
268            .collect::<Vec<_>>()
269            .join("/");
270
271        let epub_version_3 = self.config.epub_version == Some(3);
272
273        let ctx = json!({
274            "epub_version_3": epub_version_3,
275            "title": ch.name,
276            "body": body,
277            "stylesheet": stylesheet_path
278        });
279
280        self.hbs.render("index", &ctx)
281    }
282
283    /// Generate the stylesheet and add it to the document.
284    fn embed_stylesheets(&mut self) -> Result<(), Error> {
285        info!("5. Embedding stylesheets ==");
286
287        let stylesheet = self.generate_stylesheet()?;
288        self.builder.stylesheet(stylesheet.as_slice())?;
289
290        Ok(())
291    }
292
293    fn additional_assets(&mut self) -> Result<(), Error> {
294        info!(
295            "6. Embedding, downloading additional assets == [{:?}]",
296            self.assets.len()
297        );
298
299        let mut unique_assets = HashSet::new();
300        let mut count = 0;
301        for (_key, asset) in self.assets.iter_mut() {
302            // self.handler.download(asset)?;
303            debug!("Try to add asset : {}", asset);
304            if unique_assets.insert(&asset.location_on_disk) {
305                let mut content = Vec::new();
306                debug!("Read (EARLIER downloaded?) asset from disk : {}", asset);
307                self.handler.read(&asset.location_on_disk, &mut content)?;
308                let mt = asset.mimetype.to_string();
309                self.builder.add_resource(&asset.filename, &*content, mt)?;
310                count += 1;
311            }
312        }
313        debug!("Embedded '{}' additional assets", count);
314        Ok(())
315    }
316
317    fn additional_resources(&mut self) -> Result<(), Error> {
318        info!("7. Embedding additional resources ==");
319
320        let mut count = 0;
321        for path in self.config.additional_resources.iter() {
322            debug!("Embedding resource: {:?}", path);
323
324            let full_path = self.resolve_path(path)?;
325            let mt = mime_guess::from_path(&full_path).first_or_octet_stream();
326
327            let content = File::open(&full_path)?;
328            debug!(
329                "Adding resource [{}]: {:?} / {:?} ",
330                count,
331                path,
332                mt.to_string()
333            );
334            self.builder.add_resource(path, content, mt.to_string())?;
335            count += 1;
336        }
337        debug!("Embedded '{}' additional resources", count);
338        Ok(())
339    }
340
341    fn add_cover_image(&mut self) -> Result<(), Error> {
342        info!("4. Adding cover image ==");
343
344        if let Some(ref path) = self.config.cover_image {
345            let full_path = self.resolve_path(path)?;
346            let mt = mime_guess::from_path(&full_path).first_or_octet_stream();
347
348            let content = File::open(&full_path)?;
349            debug!("Adding cover image: {:?} / {:?} ", path, mt.to_string());
350            self.builder
351                .add_cover_image(path, content, mt.to_string())?;
352        }
353
354        Ok(())
355    }
356
357    /// Concatenate all provided stylesheets into one long stylesheet.
358    fn generate_stylesheet(&self) -> Result<Vec<u8>, Error> {
359        let mut stylesheet = Vec::new();
360
361        if self.config.use_default_css {
362            stylesheet.extend(DEFAULT_CSS.as_bytes());
363        }
364
365        for additional_css in &self.config.additional_css {
366            debug!("generating stylesheet: {:?}", &additional_css);
367            let full_path = self.resolve_path(additional_css)?;
368            let mut f = File::open(&full_path)?;
369            f.read_to_end(&mut stylesheet)?;
370        }
371        debug!("found style(s) = [{}]", stylesheet.len());
372        Ok(stylesheet)
373    }
374
375    fn resolve_path(&self, path: &Path) -> Result<PathBuf, Error> {
376        // Try direct canonicalization first
377        if let Ok(resolved) = path.canonicalize() {
378            return Ok(resolved);
379        }
380
381        // Try with book source directory
382        let with_src = self.ctx.root.join(&self.ctx.config.book.src).join(path);
383
384        if let Ok(resolved) = with_src.canonicalize() {
385            return Ok(resolved);
386        }
387
388        // Try with root directory
389        let with_root = self.ctx.root.join(path);
390        with_root
391            .canonicalize()
392            .map_err(|_| Error::ResourceNotFound(path.to_path_buf()))
393    }
394}
395
396impl Debug for Generator<'_> {
397    fn fmt(&self, f: &mut Formatter) -> fmt::Result {
398        f.debug_struct("Generator")
399            .field("ctx", &self.ctx)
400            .field("builder", &self.builder)
401            .field("config", &self.config)
402            .field("assets", &self.assets.keys())
403            .finish()
404    }
405}
406
407#[cfg(test)]
408mod tests {
409    use super::*;
410    use crate::resources::asset::AssetKind;
411    use crate::resources::retrieve::{MockContentRetriever, RetrievedContent, UpdatedAssetData};
412    use mime_guess::mime;
413    use std::io::Cursor;
414    use std::path::Path;
415    use tempfile::TempDir;
416    use url::Url;
417
418    use std::sync::Once;
419    static INIT: Once = Once::new();
420    pub fn init_logging() {
421        INIT.call_once(|| {
422            let _ = env_logger::builder().is_test(true).try_init();
423        });
424    }
425
426    #[test]
427    fn test_load_assets() {
428        init_logging();
429        let png = "rust-logo.png";
430        let svg = "rust-logo.svg";
431        let url = "https://www.rust-lang.org/static/images/rust-logo-blk.svg";
432        let content = format!(
433            "# Chapter 1\n\n\
434            ![Rust Logo]({png})\n\n\
435            ![Rust Logo remote]({url})\n\n\
436            <img alt=\"Rust Logo in html\" src=\"{svg}\" />\n"
437        );
438        let tmp_dir = TempDir::new().unwrap();
439        let destination = tmp_dir.path().join("mdbook-epub");
440        let json = ctx_with_template(&content, "src", destination.as_path()).to_string();
441        let ctx = RenderContext::from_json(json.as_bytes()).unwrap();
442
443        let mut mock_client = MockContentRetriever::new();
444        // mock_client.expect_download().times(3).returning(|_| Ok(()));
445        mock_client
446            .expect_download()
447            .times(0)
448            .returning(|_| Ok(UpdatedAssetData::default()));
449        // checks local path of assets
450        let book_source = PathBuf::from(&ctx.root)
451            .join(&ctx.config.book.src)
452            .canonicalize()
453            .expect(
454                format!(
455                    "book source root is not found: {}",
456                    &ctx.config.book.src.display()
457                )
458                .as_str(),
459            );
460        let should_be_png = book_source.join(png);
461        let should_be_svg = book_source.join(svg);
462        let hashed_filename = utils::hash_link(&url.parse::<Url>().unwrap());
463        let should_be_url = destination.as_path().join(hashed_filename);
464        for should_be in [should_be_svg, should_be_png, should_be_url] {
465            mock_client
466                .expect_read()
467                .times(1)
468                .withf(move |path, _| path == should_be)
469                .returning(|_, _| Ok(()));
470        }
471
472        let mut g = Generator::new_with_handler(&ctx, mock_client).unwrap();
473        g.find_assets().unwrap();
474        assert_eq!(g.assets.len(), 3);
475        g.additional_assets().unwrap();
476    }
477
478    #[test]
479    fn test_render_assets() {
480        init_logging();
481        let links = [
482            "local.webp",
483            "http://server/remote.svg",
484            "http://server/link.png",
485        ];
486        let tmp_dir = TempDir::new().unwrap();
487        let root = tmp_dir.path().join("mdbook-epub");
488        let mut assets = HashMap::new();
489        let original_link = links[0].to_string();
490        assets.insert(
491            original_link.clone(),
492            Asset {
493                original_link,
494                location_on_disk: root.as_path().join("src").join(links[0]),
495                filename: PathBuf::from(links[0]),
496                mimetype: "image/webp".parse::<mime::Mime>().unwrap(),
497                source: AssetKind::Local(PathBuf::from(links[0])),
498            },
499        );
500        let url = Url::parse(links[1]).unwrap();
501        let hashed_filename = utils::hash_link(&url);
502        let hashed_path = Path::new("cache").join(&hashed_filename);
503        let original_link = links[1].to_string();
504        assets.insert(
505            original_link.clone(),
506            Asset {
507                original_link,
508                location_on_disk: root.as_path().join("book").join(&hashed_path),
509                filename: hashed_path,
510                mimetype: "image/svg+xml".parse::<mime::Mime>().unwrap(),
511                source: AssetKind::Remote(url),
512            },
513        );
514        let markdown_str = format!(
515            "Chapter 1\n\
516            =====\n\n\
517            * [link]({})\n\
518            * ![Local Image]({})\n\
519            * <img alt=\"Remote Image\" src=\"{}\" >\n",
520            links[2], links[0], links[1]
521        );
522
523        struct TestHandler;
524        impl ContentRetriever for TestHandler {
525            fn download(&self, asset: &Asset) -> Result<UpdatedAssetData, Error> {
526                Ok(UpdatedAssetData {
527                    mimetype: asset.mimetype.clone(),
528                    filename: PathBuf::from("78221e8d16c52ea3.svg"),
529                    location_on_disk: asset.location_on_disk.clone(),
530                })
531            }
532            fn retrieve(&self, _url: &str) -> Result<RetrievedContent, Error> {
533                trace!("retrieve by {_url}");
534                let content = "Downloaded content".as_bytes();
535                Ok(RetrievedContent::new(
536                    Box::new(Cursor::new(content)),
537                    "image/svg+xml".to_string(),
538                    "svg".to_string(),
539                    Some(content.len() as u64),
540                ))
541            }
542        }
543        let test_content_retriever = TestHandler {};
544
545        let mut filter = AssetRemoteLinkFilter::new(&mut assets, 0, &test_content_retriever);
546        let parser = utils::create_new_pull_down_parser(&markdown_str);
547        let events = parser.map(|ev| filter.apply(ev));
548        trace!("Events = {:?}", events);
549        let mut html_buf = String::new();
550        html::push_html(&mut html_buf, events);
551        trace!("html_buf = {:?}", html_buf);
552
553        assert_eq!(
554            html_buf,
555            format!(
556                "<h1>Chapter 1</h1>\n\
557                <ul>\n\
558                <li><a href=\"{}\">link</a></li>\n\
559                <li><img src=\"{}\" alt=\"Local Image\" /></li>\n\
560                <li><img alt=\"Remote Image\" src=\"{}\" >\n\
561                </li>\n\
562                </ul>\n",
563                links[2], links[0], hashed_filename
564            )
565        );
566    }
567
568    #[test]
569    fn test_render_remote_assets_in_sub_chapter() {
570        init_logging();
571        let link = "https://upload.wikimedia.org/wikipedia/commons/4/4e/Open_Source_Initiative_keyhole.svg";
572        let tmp_dir = TempDir::new().unwrap();
573        let dest_dir = tmp_dir.path().join("mdbook-epub");
574        let ch1_1 = json!({
575            "Chapter": {
576                "name": "subchapter",
577                "content": format!("# Subchapter\n\n![Image]({link})"),
578                "number": [1,1],
579                "sub_items": [],
580                "path": "chapter_1/subchapter.md",
581                "parent_names": ["Chapter 1"]
582            }
583        });
584        let ch1 = json!({
585            "Chapter": {
586                "name": "Chapter 1",
587                "content": format!("# Chapter 1\n\n![Image]({link})"),
588                "number": [1],
589                "sub_items": [ch1_1],
590                "path": "chapter_1/index.md",
591                "parent_names": []
592            }
593        });
594        let ch2 = json!({
595            "Chapter": {
596                "name": "Chapter 2",
597                "content": format!("# Chapter 2\n\n![Image]({link})"),
598                "number": [2],
599                "sub_items": [],
600                "path": "chapter_2.md",
601                "parent_names": []
602            }
603        });
604        let mut json = ctx_with_template("", "src", dest_dir.as_path());
605        let chvalue = json["book"]["sections"].as_array_mut().unwrap();
606        chvalue.clear();
607        chvalue.push(ch1);
608        chvalue.push(ch2);
609
610        let ctx = RenderContext::from_json(json.to_string().as_bytes()).unwrap();
611        let mut g = Generator::new(&ctx).unwrap();
612        g.find_assets().unwrap();
613        assert_eq!(g.assets.len(), 1);
614
615        let pat = |heading, prefix| {
616            format!("<h1>{heading}</h1>\n<p><img src=\"{prefix}e3825a3756080f55.svg\"")
617        };
618        if let BookItem::Chapter(ref ch) = ctx.book.sections[0] {
619            let rendered: String = g.render_chapter(ch).unwrap();
620            debug!("1. rendered ===\n{}", &rendered);
621            assert!(rendered.contains(&pat("Chapter 1", "../")));
622
623            if let BookItem::Chapter(ref sub_ch) = ch.sub_items[0] {
624                let sub_rendered = g.render_chapter(sub_ch).unwrap();
625                debug!("2. rendered ===\n{}", &sub_rendered);
626                assert!(sub_rendered.contains(&pat("Subchapter", "../")));
627            } else {
628                panic!();
629            }
630        } else {
631            panic!();
632        }
633        if let BookItem::Chapter(ref ch) = ctx.book.sections[1] {
634            let rendered: String = g.render_chapter(ch).unwrap();
635            assert!(rendered.contains(&pat("Chapter 2", "")));
636        } else {
637            panic!();
638        }
639    }
640
641    #[test]
642    #[should_panic]
643    fn test_find_assets_with_wrong_src_dir() {
644        init_logging();
645        let tmp_dir = TempDir::new().unwrap();
646        let json = ctx_with_template(
647            "# Chapter 1\n\n",
648            "nosuchsrc",
649            tmp_dir.path().join("mdbook-epub").as_path(),
650        )
651        .to_string();
652        let ctx = RenderContext::from_json(json.as_bytes()).unwrap();
653        let mut g = Generator::new(&ctx).unwrap();
654        g.find_assets().unwrap();
655    }
656
657    fn ctx_with_template(content: &str, source: &str, destination: &Path) -> serde_json::Value {
658        json!({
659            "version": mdbook::MDBOOK_VERSION,
660            "root": "tests/long_book_example",
661            "book": {"sections": [{
662                "Chapter": {
663                    "name": "Chapter 1",
664                    "content": content,
665                    "number": [1],
666                    "sub_items": [],
667                    "path": "chapter_1.md",
668                    "parent_names": []
669                }}], "__non_exhaustive": null},
670            "config": {
671                "book": {"authors": [], "language": "en", "multilingual": false,
672                    "src": source, "title": "DummyBook"},
673                "output": {"epub": {"curly-quotes": true}}},
674            "destination": destination
675        })
676    }
677}