parse_hyperlinks_extras/iterator_html.rs
1//! Module providing iterators over the hyperlinks found in the input text.
2//! Only HTML no other markup languages are parsed here.
3#![allow(clippy::type_complexity)]
4
5use crate::parser::parse_html::take_img;
6use crate::parser::parse_html::take_link;
7use crate::parser::parse_html::take_text2dest;
8use parse_hyperlinks::parser::Link;
9use std::borrow::Cow;
10
11#[derive(Debug, PartialEq)]
12/// Iterator over the inline hyperlinks in the HTML formatted `input` text.
13/// This struct holds the iterator's state as an advancing pointer into the `input` text.
14/// The iterator's `next()` method returns a tuple with 2 tuples inside:
15/// `Some(((input_split)(html_hyperlink_element)))`.
16///
17/// Each tuple has the following parts:
18/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
19/// * `html_hyperlink_element = (text_text, link_destination, link_title)`
20///
21/// # Input split
22///
23/// ```
24/// use parse_hyperlinks_extras::iterator_html::HtmlLink;
25/// use std::borrow::Cow;
26///
27/// let i = "abc<a href=\"dest1\" title=\"title1\">text1</a>abc\n\
28/// abc<a href=\"dest2\" title=\"title2\">text2</a>xyz";
29///
30/// let mut iter = HtmlLink::new(i);
31/// assert_eq!(iter.next().unwrap().0,
32/// ("abc",
33/// "<a href=\"dest1\" title=\"title1\">text1</a>",
34/// "abc\nabc<a href=\"dest2\" title=\"title2\">text2</a>xyz")
35/// );
36/// assert_eq!(iter.next().unwrap().0,
37/// ("abc\nabc",
38/// "<a href=\"dest2\" title=\"title2\">text2</a>",
39/// "xyz")
40/// );
41/// assert_eq!(iter.next(), None);
42/// ```
43/// # Link content
44/// ## HTML
45///
46/// ```
47/// use parse_hyperlinks_extras::iterator_html::HtmlLink;
48/// use std::borrow::Cow;
49///
50/// let i = "abc<a href=\"dest1\" title=\"title1\">text1</a>abc\
51/// abc<a href=\"dest2\" title=\"title2\">text2</a>abc";
52///
53///
54/// let mut iter = HtmlLink::new(i);
55/// assert_eq!(iter.next().unwrap().1, (Cow::from("text1"), Cow::from("dest1"), Cow::from("title1")));
56/// assert_eq!(iter.next().unwrap().1, (Cow::from("text2"), Cow::from("dest2"), Cow::from("title2")));
57/// assert_eq!(iter.next(), None);
58/// ```
59pub struct HtmlLink<'a> {
60 /// The remaining text input.
61 input: &'a str,
62}
63
64/// Constructor for the `HtmlLink` struct.
65impl<'a> HtmlLink<'a> {
66 /// Constructor for the iterator. `input` is the text with inline images to be
67 /// extracted.
68 #[inline]
69 pub fn new(input: &'a str) -> Self {
70 Self { input }
71 }
72}
73
74/// Iterator over the HTML inline images in the `input`-text.
75/// The iterator's `next()` method returns a tuple with 2 tuples inside:
76/// * `Some(((input_split)(link_content)))`
77///
78/// Each tuple has the following parts:
79/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
80/// * `link_content = (link_text, link_destination, link_title)`
81///
82impl<'a> Iterator for HtmlLink<'a> {
83 type Item = (
84 (&'a str, &'a str, &'a str),
85 (Cow<'a, str>, Cow<'a, str>, Cow<'a, str>),
86 );
87 fn next(&mut self) -> Option<Self::Item> {
88 let mut output = None;
89
90 if let Ok((remaining_input, (skipped, (link_text, link_dest, link_title)))) =
91 take_text2dest(self.input)
92 {
93 let consumed = &self.input[skipped.len()..self.input.len() - remaining_input.len()];
94 // Assigning output.
95 output = Some((
96 (skipped, consumed, remaining_input),
97 (link_text, link_dest, link_title),
98 ));
99 debug_assert_eq!(self.input, {
100 let mut s = "".to_string();
101 s.push_str(skipped);
102 s.push_str(consumed);
103 s.push_str(remaining_input);
104 s
105 });
106 self.input = remaining_input;
107 };
108 output
109 }
110}
111
112#[derive(Debug, PartialEq)]
113/// Iterator over the inline images in the HTML formatted `input` text.
114/// This struct holds the iterator's state, as an advancing pointer into the `input` text. The
115/// iterator's `next()` method returns a tuple with 2 tuples inside:
116/// `Some(((input_split)(html_image_element)))`.
117///
118/// Each tuple has the following parts:
119/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
120/// * `html_image_element = (img_src, img_alt)`
121///
122/// # Input split
123///
124/// ```
125/// use parse_hyperlinks_extras::iterator_html::HtmlInlineImage;
126/// use std::borrow::Cow;
127///
128/// let i = r#"abc<img src="dest1" alt="text1">efg<img src="dest2" alt="text2">hij"#;
129///
130/// let mut iter = HtmlInlineImage::new(i);
131/// assert_eq!(iter.next().unwrap().0, ("abc", r#"<img src="dest1" alt="text1">"#,
132/// r#"efg<img src="dest2" alt="text2">hij"#));
133/// assert_eq!(iter.next().unwrap().0, ("efg", r#"<img src="dest2" alt="text2">"#,
134/// "hij"));
135/// assert_eq!(iter.next(), None);
136/// ```
137/// # Link content
138/// ## HTML
139///
140/// ```
141/// use parse_hyperlinks_extras::iterator_html::HtmlInlineImage;
142/// use std::borrow::Cow;
143///
144/// let i = r#"abc<img src="dest1" alt="text1">abc
145/// abc<img src="dest2" alt="text2">abc
146/// "#;
147///
148/// let mut iter = HtmlInlineImage::new(i);
149/// assert_eq!(iter.next().unwrap().1, (Cow::from("text1"), Cow::from("dest1")));
150/// assert_eq!(iter.next().unwrap().1, (Cow::from("text2"), Cow::from("dest2")));
151/// assert_eq!(iter.next(), None);
152/// ```
153pub struct HtmlInlineImage<'a> {
154 /// The remaining text input.
155 input: &'a str,
156}
157
158/// Constructor for the `HtmlLink` struct.
159impl<'a> HtmlInlineImage<'a> {
160 /// Constructor for the iterator. `input` is the text with inline images to be
161 /// extracted.
162 #[inline]
163 pub fn new(input: &'a str) -> Self {
164 Self { input }
165 }
166}
167
168/// Iterator over the HTML inline images in the `input`-text.
169/// The iterator's `next()` method returns a tuple with 2 tuples inside:
170/// * `Some(((input_split)(link_content)))`
171///
172/// Each tuple has the following parts:
173/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
174/// * `link_content = (image_alt, image_src)`
175///
176impl<'a> Iterator for HtmlInlineImage<'a> {
177 type Item = ((&'a str, &'a str, &'a str), (Cow<'a, str>, Cow<'a, str>));
178 fn next(&mut self) -> Option<Self::Item> {
179 let mut output = None;
180
181 if let Ok((remaining_input, (skipped, (alt, src)))) = take_img(self.input) {
182 let consumed = &self.input[skipped.len()..self.input.len() - remaining_input.len()];
183 // Assigning output.
184 output = Some(((skipped, consumed, remaining_input), (alt, src)));
185 debug_assert_eq!(self.input, {
186 let mut s = "".to_string();
187 s.push_str(skipped);
188 s.push_str(consumed);
189 s.push_str(remaining_input);
190 s
191 });
192 self.input = remaining_input;
193 };
194 output
195 }
196}
197
198/// The state of the iterator.
199#[derive(Debug, PartialEq)]
200pub struct HtmlLinkInlineImage<'a> {
201 /// The remaining text input.
202 input: &'a str,
203}
204
205/// Constructor for the `HtmlLinkInlineImage` struct.
206impl<'a> HtmlLinkInlineImage<'a> {
207 /// Constructor for the iterator. `input` is the text with hyperlinks and
208 /// inline images to be extracted.
209 #[inline]
210 pub fn new(input: &'a str) -> Self {
211 Self { input }
212 }
213}
214
215/// Iterator over the hyperlinks and inline images in the HTML formatted `input` text.
216/// This struct holds the iterator's state, as an advancing pointer into the `input` text.
217/// The iterator's `next()` method returns a tuple with a tuple inside:
218/// * `Some(((input_split), Link))`
219///
220/// The first tuple has the following parts:
221/// * `input_split = (skipped_characters, consumed_characters, remaining_characters)`
222/// * `Link` is of type `parse_hyperlinks::parser::Link` and can be one of
223/// the variants `Link::Text2Dest`, `Link::Image` or `Link::Imgage2Dest`.
224///
225/// ```
226/// use parse_hyperlinks_extras::iterator_html::HtmlLinkInlineImage;
227/// use std::borrow::Cow;
228///
229/// let i = r#"abc<img src="dest1" alt="text1">abc
230/// abc<a href="dest2" title="title2">text2</a>abc"#;
231///
232/// let mut iter = HtmlLinkInlineImage::new(i);
233/// assert_eq!(iter.next().unwrap().0, ("abc",
234/// r#"<img src="dest1" alt="text1">"#,
235/// "abc\nabc<a href=\"dest2\" title=\"title2\">text2</a>abc"
236/// ));
237/// assert_eq!(iter.next().unwrap().0, ("abc\nabc",
238/// "<a href=\"dest2\" title=\"title2\">text2</a>",
239/// "abc"
240/// ));
241/// assert_eq!(iter.next(), None);
242/// ```
243/// # Link content
244/// ## HTML
245///
246/// ```
247/// use parse_hyperlinks_extras::iterator_html::HtmlLinkInlineImage;
248/// use parse_hyperlinks::parser::Link;
249/// use std::borrow::Cow;
250///
251/// let i = r#"abc<img src="dest1" alt="text1">abc
252/// abc<a href="dest2" title="title2">text2</a>abc
253/// abc<a href="dest3" title="title3">cde<img alt="alt3" src="src3"/>fgh</a>abc
254/// "#;
255///
256/// let mut iter = HtmlLinkInlineImage::new(i);
257/// assert_eq!(iter.next().unwrap().1,
258/// Link::Image(Cow::from("text1"), Cow::from("dest1")));
259/// assert_eq!(iter.next().unwrap().1,
260/// Link::Text2Dest(Cow::from("text2"),
261/// Cow::from("dest2"),
262/// Cow::from("title2")));
263/// assert_eq!(iter.next().unwrap().1,
264/// Link::Image2Dest(Cow::from("cde"), Cow::from("alt3"), Cow::from("src3"),
265/// Cow::from("fgh"), Cow::from("dest3"), Cow::from("title3")));
266/// assert_eq!(iter.next(), None);
267/// ```
268///
269impl<'a> Iterator for HtmlLinkInlineImage<'a> {
270 type Item = ((&'a str, &'a str, &'a str), Link<'a>);
271 fn next(&mut self) -> Option<Self::Item> {
272 let mut output = None;
273
274 if let Ok((remaining_input, (skipped, link))) = take_link(self.input) {
275 let consumed = &self.input[skipped.len()..self.input.len() - remaining_input.len()];
276 // Assigning output.
277 output = Some(((skipped, consumed, remaining_input), link));
278 debug_assert_eq!(self.input, {
279 let mut s = "".to_string();
280 s.push_str(skipped);
281 s.push_str(consumed);
282 s.push_str(remaining_input);
283 s
284 });
285 self.input = remaining_input;
286 };
287 output
288 }
289}