parse_hyperlinks/parser/
html_img.rs

1//! This module implements parsers for HTML image elements.
2#![allow(dead_code)]
3
4use crate::parser::Link;
5use crate::parser::html::attribute_list;
6use crate::parser::html::tag_a_opening as href_tag_a_opening;
7use html_escape::decode_html_entities;
8use nom::Parser;
9use nom::branch::alt;
10use nom::bytes::complete::is_not;
11use nom::bytes::complete::tag;
12use nom::error::Error;
13use nom::error::ErrorKind;
14use std::borrow::Cow;
15
16/// Wrapper around `html_img()` that packs the result in
17/// `Link::Image`.
18pub fn html_img_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
19    let (i, (alt, src)) = html_img(i)?;
20    Ok((i, Link::Image(alt, src)))
21}
22
23/// Parse an HTML _image_.
24///
25/// It returns either `Ok((i, (img_alt, img_src)))` or some error.
26///
27/// The parser expects to start at the link start (`<`) to succeed.
28/// ```
29/// use parse_hyperlinks;
30/// use parse_hyperlinks::parser::html_img::html_img;
31/// use std::borrow::Cow;
32///
33/// assert_eq!(
34///   html_img(r#"<img src="/images/my&amp;dog.png" alt="my Dog" width="500">abc"#),
35///   Ok(("abc", (Cow::from("my Dog"), Cow::from("/images/my&dog.png"))))
36/// );
37/// ```
38pub fn html_img(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
39    tag_img(i)
40}
41
42/// Parses a `<img ...>` tag and returns
43/// either `Ok((i, (img_alt, img_src)))` or some error.
44#[inline]
45fn tag_img(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
46    nom::sequence::delimited(
47        // HTML is case insensitive. XHTML, that is being XML is case sensitive.
48        // Here we deal with HTML.
49        alt((tag("<img "), tag("<IMG "))),
50        nom::combinator::map_parser(is_not(">"), parse_attributes),
51        tag(">"),
52    )
53    .parse(i)
54}
55
56/// Wrapper around `html_img()` that packs the result in
57/// `Link::Image`.
58pub fn html_img2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
59    let (i, (text1, img_alt, img_src, text2, dest, title)) = html_img2dest(i)?;
60    Ok((
61        i,
62        Link::Image2Dest(text1, img_alt, img_src, text2, dest, title),
63    ))
64}
65
66/// Parse an HTML inline hyperlink with embedded image.
67///
68/// It returns either
69// `Ok((i, (text1, img_alt, img_src, text2, dest, title)))` or some error.
70///
71///
72/// The parser expects to start at the link start (`<`) to succeed.
73/// ```
74/// use parse_hyperlinks::parser::Link;
75/// use parse_hyperlinks::parser::html_img::html_img2dest;
76/// use std::borrow::Cow;
77///
78/// assert_eq!(
79///   html_img2dest("<a href=\"my doc.html\" title=\"title\">\
80///                    before<img src=\"dog.png\" alt=\"alt dog\"/>after\
81///                    </a>abc"),
82///   Ok(("abc",
83///    (Cow::from("before"), Cow::from("alt dog"), Cow::from("dog.png"),
84///     Cow::from("after"), Cow::from("my doc.html"), Cow::from("title"),
85/// ))));
86/// ```
87#[allow(clippy::type_complexity)]
88pub fn html_img2dest(
89    i: &'_ str,
90) -> nom::IResult<
91    &'_ str,
92    (
93        Cow<'_, str>,
94        Cow<'_, str>,
95        Cow<'_, str>,
96        Cow<'_, str>,
97        Cow<'_, str>,
98        Cow<'_, str>,
99    ),
100> {
101    let (i, ((dest, title), text)) = nom::sequence::terminated(
102        nom::sequence::pair(
103            href_tag_a_opening,
104            alt((
105                nom::bytes::complete::take_until("</a>"),
106                nom::bytes::complete::take_until("</A>"),
107            )),
108        ),
109        // HTML is case insensitive. XHTML, that is being XML is case sensitive.
110        // Here we deal with HTML.
111        alt((tag("</a>"), tag("</A>"))),
112    )
113    .parse(i)?;
114
115    let (_, (text1, (img_alt, img_src), text2)) = (
116        nom::bytes::complete::take_until("<img"),
117        html_img,
118        nom::combinator::rest,
119    )
120        .parse(text)?;
121
122    let text1 = decode_html_entities(text1);
123    let text2 = decode_html_entities(text2);
124
125    Ok((i, (text1, img_alt, img_src, text2, dest, title)))
126}
127
128/// Extracts the `src` and `alt` attributes and returns
129/// `Ok((img_alt, img_src))`. `img_alt` can be empty,
130/// `img_src` not.
131fn parse_attributes(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
132    let (i, attributes) = attribute_list(i)?;
133    let mut src = Cow::Borrowed("");
134    let mut alt = Cow::Borrowed("");
135
136    for (name, value) in attributes {
137        if name == "src" {
138            // Make sure `src` is empty, it can appear only
139            // once.
140            if !(*src).is_empty() {
141                return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
142            }
143            src = value;
144        } else if name == "alt" {
145            // Make sure `title` is empty, it can appear only
146            // once.
147            if !(*alt).is_empty() {
148                return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
149            }
150            alt = value;
151        }
152    }
153
154    // Assure that `href` is not empty.
155    if (*src).is_empty() {
156        return Err(nom::Err::Error(Error::new(i, ErrorKind::Eof)));
157    };
158
159    Ok((i, (alt, src)))
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165    use crate::parser::html::attribute_list;
166
167    #[test]
168    fn test_tag_img() {
169        let expected = (
170            "abc",
171            (
172                Cow::from("My dog"),
173                Cow::from("http://getreu.net/my&dog.png"),
174            ),
175        );
176        assert_eq!(
177            tag_img(r#"<img src="http://getreu.net/my&amp;dog.png" alt="My dog">abc"#).unwrap(),
178            expected
179        );
180        assert_eq!(
181            tag_img(r#"<IMG src="http://getreu.net/my&amp;dog.png" alt="My dog">abc"#).unwrap(),
182            expected
183        );
184        assert_eq!(
185            tag_img(r#"<IMG src="http://getreu.net/my&amp;dog.png" alt="My dog"/>abc"#).unwrap(),
186            expected
187        );
188        assert_eq!(
189            tag_img(r#"<IMG src="http://getreu.net/my&amp;dog.png" alt="My dog" />abc"#).unwrap(),
190            expected
191        );
192
193        let expected = (
194            "abc",
195            (Cow::from("Some picture"), Cow::from("t%20m%20p.jpg")),
196        );
197        assert_eq!(
198            tag_img(r#"<img src="t%20m%20p.jpg" alt="Some picture" />abc"#).unwrap(),
199            expected
200        );
201    }
202
203    #[test]
204    fn test_parse_attributes() {
205        let expected = (
206            "",
207            (
208                Cow::from("My dog"),
209                Cow::from("http://getreu.net/my&dog.png"),
210            ),
211        );
212        assert_eq!(
213            parse_attributes(r#"abc src="http://getreu.net/my&amp;dog.png" abc alt="My dog" abc"#)
214                .unwrap(),
215            expected
216        );
217
218        let expected =
219            nom::Err::Error(nom::error::Error::new("src", nom::error::ErrorKind::ManyMN));
220        assert_eq!(
221            parse_attributes(r#" src="http://getreu.net" src="http://blog.getreu.net" "#)
222                .unwrap_err(),
223            expected
224        );
225
226        let expected =
227            nom::Err::Error(nom::error::Error::new("alt", nom::error::ErrorKind::ManyMN));
228        assert_eq!(
229            parse_attributes(r#" src="http://getreu.net" alt="a" alt="b" "#).unwrap_err(),
230            expected
231        );
232
233        let expected = nom::Err::Error(nom::error::Error::new("", nom::error::ErrorKind::Eof));
234        assert_eq!(
235            parse_attributes(r#" title="title" "#).unwrap_err(),
236            expected
237        );
238    }
239
240    #[test]
241    fn test_attribute_list() {
242        let expected = (
243            "",
244            vec![
245                ("", Cow::from("")),
246                ("src", Cow::from("http://getreu.net/my&dog.png")),
247                ("", Cow::from("")),
248                ("alt", Cow::from("My dog")),
249                ("", Cow::from("")),
250            ],
251        );
252        assert_eq!(
253            attribute_list(r#"abc src="http://getreu.net/my&amp;dog.png" abc alt="My dog" abc"#)
254                .unwrap(),
255            expected
256        );
257    }
258}