parse_hyperlinks_extras/
iterator_html.rs

1//! Module providing iterators over the hyperlinks found in the input text.
2//! Only HTML no other markup languages are parsed here.
3#![allow(clippy::type_complexity)]
4
5use crate::parser::parse_html::take_img;
6use crate::parser::parse_html::take_link;
7use crate::parser::parse_html::take_text2dest;
8use parse_hyperlinks::parser::Link;
9use std::borrow::Cow;
10
11#[derive(Debug, PartialEq)]
12/// Iterator over the inline hyperlinks in the HTML formatted `input` text.
13/// This struct holds the iterator's state as an advancing pointer into the `input` text.
14/// The iterator's `next()` method returns a tuple with 2 tuples inside:
15/// `Some(((input_split)(html_hyperlink_element)))`.
16///
17/// Each tuple has the following parts:
18/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
19/// * `html_hyperlink_element = (text_text, link_destination, link_title)`
20///
21/// # Input split
22///
23/// ```
24/// use parse_hyperlinks_extras::iterator_html::HtmlLink;
25/// use std::borrow::Cow;
26///
27/// let i = "abc<a href=\"dest1\" title=\"title1\">text1</a>abc\n\
28///          abc<a href=\"dest2\" title=\"title2\">text2</a>xyz";
29///
30/// let mut iter = HtmlLink::new(i);
31/// assert_eq!(iter.next().unwrap().0,
32///            ("abc",
33///             "<a href=\"dest1\" title=\"title1\">text1</a>",
34///             "abc\nabc<a href=\"dest2\" title=\"title2\">text2</a>xyz")
35///           );
36/// assert_eq!(iter.next().unwrap().0,
37///            ("abc\nabc",
38///             "<a href=\"dest2\" title=\"title2\">text2</a>",
39///             "xyz")
40///           );
41/// assert_eq!(iter.next(), None);
42/// ```
43/// # Link content
44/// ## HTML
45///
46/// ```
47/// use parse_hyperlinks_extras::iterator_html::HtmlLink;
48/// use std::borrow::Cow;
49///
50/// let i = "abc<a href=\"dest1\" title=\"title1\">text1</a>abc\
51///          abc<a href=\"dest2\" title=\"title2\">text2</a>abc";
52///
53///
54/// let mut iter = HtmlLink::new(i);
55/// assert_eq!(iter.next().unwrap().1, (Cow::from("text1"), Cow::from("dest1"), Cow::from("title1")));
56/// assert_eq!(iter.next().unwrap().1, (Cow::from("text2"), Cow::from("dest2"), Cow::from("title2")));
57/// assert_eq!(iter.next(), None);
58/// ```
59pub struct HtmlLink<'a> {
60    /// The remaining text input.
61    input: &'a str,
62}
63
64/// Constructor for the `HtmlLink` struct.
65impl<'a> HtmlLink<'a> {
66    /// Constructor for the iterator. `input` is the text with inline images to be
67    /// extracted.
68    #[inline]
69    pub fn new(input: &'a str) -> Self {
70        Self { input }
71    }
72}
73
74/// Iterator over the HTML inline images in the `input`-text.
75/// The iterator's `next()` method returns a tuple with 2 tuples inside:
76/// * `Some(((input_split)(link_content)))`
77///
78/// Each tuple has the following parts:
79/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
80/// * `link_content = (link_text, link_destination, link_title)`
81///
82impl<'a> Iterator for HtmlLink<'a> {
83    type Item = (
84        (&'a str, &'a str, &'a str),
85        (Cow<'a, str>, Cow<'a, str>, Cow<'a, str>),
86    );
87    fn next(&mut self) -> Option<Self::Item> {
88        let mut output = None;
89
90        if let Ok((remaining_input, (skipped, (link_text, link_dest, link_title)))) =
91            take_text2dest(self.input)
92        {
93            let consumed = &self.input[skipped.len()..self.input.len() - remaining_input.len()];
94            // Assigning output.
95            output = Some((
96                (skipped, consumed, remaining_input),
97                (link_text, link_dest, link_title),
98            ));
99            debug_assert_eq!(self.input, {
100                let mut s = "".to_string();
101                s.push_str(skipped);
102                s.push_str(consumed);
103                s.push_str(remaining_input);
104                s
105            });
106            self.input = remaining_input;
107        };
108        output
109    }
110}
111
112#[derive(Debug, PartialEq)]
113/// Iterator over the inline images in the HTML formatted `input` text.
114/// This struct holds the iterator's state, as an advancing pointer into the `input` text.  The
115/// iterator's `next()` method returns a tuple with 2 tuples inside:
116/// `Some(((input_split)(html_image_element)))`.
117///
118/// Each tuple has the following parts:
119/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
120/// * `html_image_element = (img_src, img_alt)`
121///
122/// # Input split
123///
124/// ```
125/// use parse_hyperlinks_extras::iterator_html::HtmlInlineImage;
126/// use std::borrow::Cow;
127///
128/// let i = r#"abc<img src="dest1" alt="text1">efg<img src="dest2" alt="text2">hij"#;
129///
130/// let mut iter = HtmlInlineImage::new(i);
131/// assert_eq!(iter.next().unwrap().0, ("abc", r#"<img src="dest1" alt="text1">"#,
132///       r#"efg<img src="dest2" alt="text2">hij"#));
133/// assert_eq!(iter.next().unwrap().0, ("efg", r#"<img src="dest2" alt="text2">"#,
134///       "hij"));
135/// assert_eq!(iter.next(), None);
136/// ```
137/// # Link content
138/// ## HTML
139///
140/// ```
141/// use parse_hyperlinks_extras::iterator_html::HtmlInlineImage;
142/// use std::borrow::Cow;
143///
144/// let i = r#"abc<img src="dest1" alt="text1">abc
145/// abc<img src="dest2" alt="text2">abc
146/// "#;
147///
148/// let mut iter = HtmlInlineImage::new(i);
149/// assert_eq!(iter.next().unwrap().1, (Cow::from("text1"), Cow::from("dest1")));
150/// assert_eq!(iter.next().unwrap().1, (Cow::from("text2"), Cow::from("dest2")));
151/// assert_eq!(iter.next(), None);
152/// ```
153pub struct HtmlInlineImage<'a> {
154    /// The remaining text input.
155    input: &'a str,
156}
157
158/// Constructor for the `HtmlLink` struct.
159impl<'a> HtmlInlineImage<'a> {
160    /// Constructor for the iterator. `input` is the text with inline images to be
161    /// extracted.
162    #[inline]
163    pub fn new(input: &'a str) -> Self {
164        Self { input }
165    }
166}
167
168/// Iterator over the HTML inline images in the `input`-text.
169/// The iterator's `next()` method returns a tuple with 2 tuples inside:
170/// * `Some(((input_split)(link_content)))`
171///
172/// Each tuple has the following parts:
173/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
174/// * `link_content = (image_alt, image_src)`
175///
176impl<'a> Iterator for HtmlInlineImage<'a> {
177    type Item = ((&'a str, &'a str, &'a str), (Cow<'a, str>, Cow<'a, str>));
178    fn next(&mut self) -> Option<Self::Item> {
179        let mut output = None;
180
181        if let Ok((remaining_input, (skipped, (alt, src)))) = take_img(self.input) {
182            let consumed = &self.input[skipped.len()..self.input.len() - remaining_input.len()];
183            // Assigning output.
184            output = Some(((skipped, consumed, remaining_input), (alt, src)));
185            debug_assert_eq!(self.input, {
186                let mut s = "".to_string();
187                s.push_str(skipped);
188                s.push_str(consumed);
189                s.push_str(remaining_input);
190                s
191            });
192            self.input = remaining_input;
193        };
194        output
195    }
196}
197
198/// The state of the iterator.
199#[derive(Debug, PartialEq)]
200pub struct HtmlLinkInlineImage<'a> {
201    /// The remaining text input.
202    input: &'a str,
203}
204
205/// Constructor for the `HtmlLinkInlineImage` struct.
206impl<'a> HtmlLinkInlineImage<'a> {
207    /// Constructor for the iterator. `input` is the text with hyperlinks and
208    /// inline images to be extracted.
209    #[inline]
210    pub fn new(input: &'a str) -> Self {
211        Self { input }
212    }
213}
214
215/// Iterator over the hyperlinks and inline images in the HTML formatted `input` text.
216/// This struct holds the iterator's state, as an advancing pointer into the `input` text.  
217/// The iterator's `next()` method returns a tuple with a tuple inside:
218/// * `Some(((input_split), Link))`
219///
220/// The first tuple has the following parts:
221/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
222/// * `Link` is of type `parse_hyperlinks::parser::Link` and can be one of
223///   the variants `Link::Text2Dest`, `Link::Image` or `Link::Imgage2Dest`.
224///
225/// ```
226/// use parse_hyperlinks_extras::iterator_html::HtmlLinkInlineImage;
227/// use std::borrow::Cow;
228///
229/// let i = r#"abc<img src="dest1" alt="text1">abc
230/// abc<a href="dest2" title="title2">text2</a>abc"#;
231///
232/// let mut iter = HtmlLinkInlineImage::new(i);
233/// assert_eq!(iter.next().unwrap().0, ("abc",
234///     r#"<img src="dest1" alt="text1">"#,
235///     "abc\nabc<a href=\"dest2\" title=\"title2\">text2</a>abc"
236///     ));
237/// assert_eq!(iter.next().unwrap().0, ("abc\nabc",
238///     "<a href=\"dest2\" title=\"title2\">text2</a>",
239///     "abc"
240///     ));
241/// assert_eq!(iter.next(), None);
242/// ```
243/// # Link content
244/// ## HTML
245///
246/// ```
247/// use parse_hyperlinks_extras::iterator_html::HtmlLinkInlineImage;
248/// use parse_hyperlinks::parser::Link;
249/// use std::borrow::Cow;
250///
251/// let i = r#"abc<img src="dest1" alt="text1">abc
252/// abc<a href="dest2" title="title2">text2</a>abc
253/// abc<a href="dest3" title="title3">cde<img alt="alt3" src="src3"/>fgh</a>abc
254/// "#;
255///
256/// let mut iter = HtmlLinkInlineImage::new(i);
257/// assert_eq!(iter.next().unwrap().1,
258///            Link::Image(Cow::from("text1"), Cow::from("dest1")));
259/// assert_eq!(iter.next().unwrap().1,
260///            Link::Text2Dest(Cow::from("text2"),
261///                            Cow::from("dest2"),
262///                            Cow::from("title2")));
263/// assert_eq!(iter.next().unwrap().1,
264///  Link::Image2Dest(Cow::from("cde"), Cow::from("alt3"), Cow::from("src3"),
265///                 Cow::from("fgh"), Cow::from("dest3"), Cow::from("title3")));
266/// assert_eq!(iter.next(), None);
267/// ```
268///
269impl<'a> Iterator for HtmlLinkInlineImage<'a> {
270    type Item = ((&'a str, &'a str, &'a str), Link<'a>);
271    fn next(&mut self) -> Option<Self::Item> {
272        let mut output = None;
273
274        if let Ok((remaining_input, (skipped, link))) = take_link(self.input) {
275            let consumed = &self.input[skipped.len()..self.input.len() - remaining_input.len()];
276            // Assigning output.
277            output = Some(((skipped, consumed, remaining_input), link));
278            debug_assert_eq!(self.input, {
279                let mut s = "".to_string();
280                s.push_str(skipped);
281                s.push_str(consumed);
282                s.push_str(remaining_input);
283                s
284            });
285            self.input = remaining_input;
286        };
287        output
288    }
289}