1#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use html_escape::decode_html_entities;
7use nom::AsChar;
8use nom::Parser;
9use nom::branch::alt;
10use nom::bytes::complete::is_not;
11use nom::bytes::complete::tag;
12use nom::character::complete::alphanumeric1;
13use nom::error::Error;
14use nom::error::ErrorKind;
15use std::borrow::Cow;
16
17pub fn html_text2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
20 let (i, (te, de, ti)) = html_text2dest(i)?;
21 Ok((i, Link::Text2Dest(te, de, ti)))
22}
23
24pub fn html_text2dest(
40 i: &'_ str,
41) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
42 let (i, ((link_destination, link_title), link_text)) = nom::sequence::terminated(
43 nom::sequence::pair(
44 tag_a_opening,
45 alt((
46 nom::bytes::complete::take_until("</a>"),
47 nom::bytes::complete::take_until("</A>"),
48 )),
49 ),
50 alt((tag("</a>"), tag("</A>"))),
53 )
54 .parse(i)?;
55 let link_text = decode_html_entities(link_text);
56 Ok((i, (link_text, link_destination, link_title)))
57}
58
59pub(crate) fn tag_a_opening(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
62 nom::sequence::delimited(
63 alt((tag("<a "), tag("<A "))),
66 nom::combinator::map_parser(is_not(">"), parse_attributes),
67 tag(">"),
68 )
69 .parse(i)
70}
71
72fn attribute(i: &'_ str) -> nom::IResult<&'_ str, (&'_ str, Cow<'_, str>)> {
75 alt((
76 nom::sequence::pair(
77 nom::combinator::verify(alphanumeric1, |s: &str| {
78 s.chars().next().is_some_and(|c| c.is_alpha())
79 }),
80 alt((
81 nom::combinator::value(Cow::from(""), tag(r#"="""#)),
82 nom::combinator::value(Cow::from(""), tag(r#"=''"#)),
83 nom::combinator::map(
84 nom::sequence::delimited(tag("=\""), is_not("\""), tag("\"")),
85 |s: &str| decode_html_entities(s),
86 ),
87 nom::combinator::map(
88 nom::sequence::delimited(tag("='"), is_not("'"), tag("'")),
89 |s: &str| decode_html_entities(s),
90 ),
91 nom::combinator::map(nom::sequence::preceded(tag("="), is_not(" ")), |s: &str| {
92 decode_html_entities(s)
93 }),
94 )),
95 ),
96 nom::combinator::value(
98 ("", Cow::from("")),
99 nom::combinator::verify(alphanumeric1, |s: &str| {
100 s.chars().next().is_some_and(|c| c.is_alpha())
101 }),
102 ),
103 ))
104 .parse(i)
105}
106
107pub fn attribute_list(i: &'_ str) -> nom::IResult<&'_ str, Vec<(&'_ str, Cow<'_, str>)>> {
109 let i = i.trim();
110 nom::multi::separated_list1(nom::character::complete::multispace1, attribute).parse(i)
111}
112
113fn parse_attributes(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
117 let (i, attributes) = attribute_list(i)?;
118 let mut href = Cow::Borrowed("");
119 let mut title = Cow::Borrowed("");
120
121 for (name, value) in attributes {
122 if name == "href" {
123 if !(*href).is_empty() {
126 return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
127 }
128 href = value;
129 } else if name == "title" {
130 if !(*title).is_empty() {
133 return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
134 }
135 title = value;
136 }
137 }
138
139 if (*href).is_empty() {
141 return Err(nom::Err::Error(Error::new(i, ErrorKind::Eof)));
142 };
143
144 Ok((i, (href, title)))
145}
146
147#[cfg(test)]
148mod tests {
149 use super::*;
150
151 #[test]
152 fn test_html_text2dest() {
153 let expected = (
154 "abc",
155 (
156 Cow::from("W3Schools"),
157 Cow::from("https://www.w3schools.com/"),
158 Cow::from("W3S"),
159 ),
160 );
161 assert_eq!(
162 html_text2dest(r#"<a title="W3S" href="https://www.w3schools.com/">W3Schools</a>abc"#)
163 .unwrap(),
164 expected
165 );
166 assert_eq!(
167 html_text2dest(r#"<A title="W3S" href="https://www.w3schools.com/">W3Schools</A>abc"#)
168 .unwrap(),
169 expected
170 );
171
172 let expected = ("abc", (Cow::from("<n>"), Cow::from("h"), Cow::from("t")));
173 assert_eq!(
174 html_text2dest(r#"<a title="t" href="h"><n></a>abc"#).unwrap(),
175 expected
176 );
177
178 let expected = ("abc", (Cow::from("name"), Cow::from("url"), Cow::from("")));
179 assert_eq!(
180 html_text2dest(r#"<a href="url" title="" >name</a>abc"#).unwrap(),
181 expected
182 );
183
184 let expected = (
185 "abc",
186 (Cow::from("na</me"), Cow::from("url"), Cow::from("")),
187 );
188 assert_eq!(
189 html_text2dest(r#"<a href="url" title="" >na</me</A>abc"#).unwrap(),
190 expected
191 );
192
193 let expected = nom::Err::Error(nom::error::Error::new(
194 r#"<a href="url" title="" >name</a abc"#,
195 nom::error::ErrorKind::AlphaNumeric,
196 ));
197 assert_eq!(
198 parse_attributes(r#"<a href="url" title="" >name</a abc"#).unwrap_err(),
199 expected
200 );
201
202 let expected = (
203 "abc",
204 (
205 Cow::from(
206 "<img src=\"w3html.gif\" alt=\"W3Schools.com \"width=\"100\" height=\"132\">",
207 ),
208 Cow::from("https://blog.getreu.net"),
209 Cow::from(""),
210 ),
211 );
212 assert_eq!(
213 html_text2dest(
214 "<a href=\"https://blog.getreu.net\">\
215 <img src=\"w3html.gif\" alt=\"W3Schools.com \"\
216 width=\"100\" height=\"132\">\
217 </a>abc"
218 )
219 .unwrap(),
220 expected
221 );
222 }
223
224 #[test]
225 fn test_tag_a_opening() {
226 let expected = (
227 "abc",
228 (Cow::from("http://getreu.net"), Cow::from("My blog")),
229 );
230 assert_eq!(
231 tag_a_opening(r#"<a href="http://getreu.net" title="My blog">abc"#).unwrap(),
232 expected
233 );
234 assert_eq!(
235 tag_a_opening(r#"<A href="http://getreu.net" title="My blog">abc"#).unwrap(),
236 expected
237 );
238 }
239
240 #[test]
241 fn test_parse_attributes() {
242 let expected = ("", (Cow::from("http://getreu.net"), Cow::from("My blog")));
243 assert_eq!(
244 parse_attributes(r#"abc href="http://getreu.net" abc title="My blog" abc"#).unwrap(),
245 expected
246 );
247
248 let expected = nom::Err::Error(nom::error::Error::new(
249 "href",
250 nom::error::ErrorKind::ManyMN,
251 ));
252 assert_eq!(
253 parse_attributes(r#" href="http://getreu.net" href="http://blog.getreu.net" "#)
254 .unwrap_err(),
255 expected
256 );
257
258 let expected = nom::Err::Error(nom::error::Error::new(
259 "title",
260 nom::error::ErrorKind::ManyMN,
261 ));
262 assert_eq!(
263 parse_attributes(r#" href="http://getreu.net" title="a" title="b" "#).unwrap_err(),
264 expected
265 );
266
267 let expected = nom::Err::Error(nom::error::Error::new("", nom::error::ErrorKind::Eof));
268 assert_eq!(
269 parse_attributes(r#" title="title" "#).unwrap_err(),
270 expected
271 );
272 }
273
274 #[test]
275 fn test_attribute_list() {
276 let expected = (
277 "",
278 vec![
279 ("", Cow::from("")),
280 ("href", Cow::from("http://getreu.net")),
281 ("", Cow::from("")),
282 ("title", Cow::from("My blog")),
283 ("", Cow::from("")),
284 ],
285 );
286 assert_eq!(
287 attribute_list(r#"abc href="http://getreu.net" abc title="My blog" abc"#).unwrap(),
288 expected
289 );
290 }
291
292 #[test]
293 fn test_attribute() {
294 let expected = (" abc", ("href", Cow::from("http://getreu.net")));
295 assert_eq!(
296 attribute(r#"href="http://getreu.net" abc"#).unwrap(),
297 expected
298 );
299 assert_eq!(
300 attribute(r#"href='http://getreu.net' abc"#).unwrap(),
301 expected
302 );
303 assert_eq!(
305 attribute(r#"href=http://getreu.net abc"#).unwrap(),
306 expected
307 );
308
309 let expected = (" abc", ("href", Cow::from("http://getreu.net/<>")));
310 assert_eq!(
311 attribute(r#"href="http://getreu.net/<>" abc"#).unwrap(),
312 expected
313 );
314 assert_eq!(
315 attribute(r#"href='http://getreu.net/<>' abc"#).unwrap(),
316 expected
317 );
318 assert_eq!(
320 attribute(r#"href=http://getreu.net/<> abc"#).unwrap(),
321 expected
322 );
323
324 let expected = (" abc", ("", Cow::from("")));
325 assert_eq!(attribute("bool abc").unwrap(), expected);
326
327 let expected = nom::Err::Error(nom::error::Error::new(
328 "1name",
329 nom::error::ErrorKind::Verify,
330 ));
331 assert_eq!(attribute("1name").unwrap_err(), expected);
332
333 let expected = nom::Err::Error(nom::error::Error::new(
334 r#"1name="http://getreu.net"#,
335 nom::error::ErrorKind::Verify,
336 ));
337 assert_eq!(
338 attribute(r#"1name="http://getreu.net"#).unwrap_err(),
339 expected
340 );
341 }
342}