html2md/
images.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
use std::sync::Arc;

use super::common::get_tag_attr;
use super::StructuredPrinter;
use super::TagHandler;
use markup5ever_rcdom::Handle;
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use url::Url;

const FRAGMENT: &AsciiSet = &CONTROLS.add(b' ').add(b'"').add(b'<').add(b'>').add(b'`');

/// Handler for `<img>` tag. Depending on circumstances can produce both
/// inline HTML-formatted image and Markdown native one
#[derive(Default)]
pub struct ImgHandler {
    block_mode: bool,
    commonmark: bool,
    /// Used to make absolute urls.
    url: Option<Arc<Url>>,
}

impl ImgHandler {
    pub fn new(commonmark: bool, url: &Option<std::sync::Arc<Url>>) -> Self {
        Self {
            commonmark,
            url: if let Some(u) = url {
                Some(u.clone())
            } else {
                None
            },
            ..Default::default()
        }
    }
}

impl TagHandler for ImgHandler {
    fn handle(&mut self, tag: &Handle, printer: &mut StructuredPrinter) {
        // hack: detect if the image has associated style and has display in block mode
        let style_tag = get_tag_attr(tag, "style");

        if let Some(style) = style_tag {
            if style.contains("display: block") {
                self.block_mode = true
            }
        }

        if self.block_mode {
            // make image on new paragraph
            printer.insert_newline();
        }

        // try to extract attrs
        let src = get_tag_attr(tag, "src");

        let alt = get_tag_attr(tag, "alt");
        let title = get_tag_attr(tag, "title");
        let height = get_tag_attr(tag, "height");
        let width = get_tag_attr(tag, "width");
        let align = get_tag_attr(tag, "align");

        if self.commonmark && (height.is_some() || width.is_some() || align.is_some()) {
            // need to handle it as inline html to preserve attributes we support
            printer.append_str(&format!(
                "<img{} />",
                alt.map(|value| format!(" alt=\"{}\"", value))
                    .unwrap_or_default()
                    + &src
                        .map(|value| format!(" src=\"{}\"", value))
                        .unwrap_or_default()
                    + &title
                        .map(|value| format!(" title=\"{}\"", value))
                        .unwrap_or_default()
                    + &height
                        .map(|value| format!(" height=\"{}\"", value))
                        .unwrap_or_default()
                    + &width
                        .map(|value| format!(" width=\"{}\"", value))
                        .unwrap_or_default()
                    + &align
                        .map(|value| format!(" align=\"{}\"", value))
                        .unwrap_or_default()
            ));
        } else {
            // need to escape URL if it contains spaces
            // don't have any geometry-controlling attrs, post markdown natively
            let mut img_url = src.unwrap_or_default();

            if img_url.contains(' ') {
                img_url = utf8_percent_encode(&img_url, FRAGMENT).to_string();
            }

            if img_url.starts_with("/") {
                if let Some(ref u) = self.url {
                    if let Ok(n) = u.join(&img_url) {
                        img_url = n.to_string();
                    }
                }
            }

            printer.append_str(&format!(
                "![{}]({}{})",
                alt.unwrap_or_default(),
                &img_url,
                title
                    .map(|value| format!(" \"{}\"", value))
                    .unwrap_or_default()
            ));
        }
    }

    fn after_handle(&mut self, printer: &mut StructuredPrinter) {
        if self.block_mode {
            printer.insert_newline();
        }
    }
}