1#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use html_escape::decode_html_entities;
7use nom::branch::alt;
8use nom::bytes::complete::is_not;
9use nom::bytes::complete::tag;
10use nom::character::complete::alphanumeric1;
11use nom::error::Error;
12use nom::error::ErrorKind;
13use std::borrow::Cow;
14
15pub fn html_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
18 let (i, (te, de, ti)) = html_text2dest(i)?;
19 Ok((i, Link::Text2Dest(te, de, ti)))
20}
21
22pub fn html_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
38 let (i, ((link_destination, link_title), link_text)) = nom::sequence::terminated(
39 nom::sequence::pair(
40 tag_a_opening,
41 alt((
42 nom::bytes::complete::take_until("</a>"),
43 nom::bytes::complete::take_until("</A>"),
44 )),
45 ),
46 alt((tag("</a>"), tag("</A>"))),
49 )(i)?;
50 let link_text = decode_html_entities(link_text);
51 Ok((i, (link_text, link_destination, link_title)))
52}
53
54pub(crate) fn tag_a_opening(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
57 nom::sequence::delimited(
58 alt((tag("<a "), tag("<A "))),
61 nom::combinator::map_parser(is_not(">"), parse_attributes),
62 tag(">"),
63 )(i)
64}
65
66fn attribute(i: &str) -> nom::IResult<&str, (&str, Cow<str>)> {
69 alt((
70 nom::sequence::pair(
71 nom::combinator::verify(alphanumeric1, |s: &str| {
72 nom::character::is_alphabetic(s.as_bytes()[0])
73 }),
74 alt((
75 nom::combinator::value(Cow::from(""), tag(r#"="""#)),
76 nom::combinator::value(Cow::from(""), tag(r#"=''"#)),
77 nom::combinator::map(
78 nom::sequence::delimited(tag("=\""), is_not("\""), tag("\"")),
79 |s: &str| decode_html_entities(s),
80 ),
81 nom::combinator::map(
82 nom::sequence::delimited(tag("='"), is_not("'"), tag("'")),
83 |s: &str| decode_html_entities(s),
84 ),
85 nom::combinator::map(nom::sequence::preceded(tag("="), is_not(" ")), |s: &str| {
86 decode_html_entities(s)
87 }),
88 )),
89 ),
90 nom::combinator::value(
92 ("", Cow::from("")),
93 nom::combinator::verify(alphanumeric1, |s: &str| {
94 nom::character::is_alphabetic(s.as_bytes()[0])
95 }),
96 ),
97 ))(i)
98}
99
100pub fn attribute_list<'a>(i: &'a str) -> nom::IResult<&'a str, Vec<(&'a str, Cow<str>)>> {
102 let i = i.trim();
103 nom::multi::separated_list1(nom::character::complete::multispace1, attribute)(i)
104}
105
106fn parse_attributes(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
110 let (i, attributes) = attribute_list(i)?;
111 let mut href = Cow::Borrowed("");
112 let mut title = Cow::Borrowed("");
113
114 for (name, value) in attributes {
115 if name == "href" {
116 if !(&*href).is_empty() {
119 return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
120 }
121 href = value;
122 } else if name == "title" {
123 if !(&*title).is_empty() {
126 return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
127 }
128 title = value;
129 }
130 }
131
132 if (&*href).is_empty() {
134 return Err(nom::Err::Error(Error::new(i, ErrorKind::Eof)));
135 };
136
137 Ok((i, (href, title)))
138}
139
140#[cfg(test)]
141mod tests {
142 use super::*;
143
144 #[test]
145 fn test_html_text2dest() {
146 let expected = (
147 "abc",
148 (
149 Cow::from("W3Schools"),
150 Cow::from("https://www.w3schools.com/"),
151 Cow::from("W3S"),
152 ),
153 );
154 assert_eq!(
155 html_text2dest(r#"<a title="W3S" href="https://www.w3schools.com/">W3Schools</a>abc"#)
156 .unwrap(),
157 expected
158 );
159 assert_eq!(
160 html_text2dest(r#"<A title="W3S" href="https://www.w3schools.com/">W3Schools</A>abc"#)
161 .unwrap(),
162 expected
163 );
164
165 let expected = ("abc", (Cow::from("<n>"), Cow::from("h"), Cow::from("t")));
166 assert_eq!(
167 html_text2dest(r#"<a title="t" href="h"><n></a>abc"#).unwrap(),
168 expected
169 );
170
171 let expected = ("abc", (Cow::from("name"), Cow::from("url"), Cow::from("")));
172 assert_eq!(
173 html_text2dest(r#"<a href="url" title="" >name</a>abc"#).unwrap(),
174 expected
175 );
176
177 let expected = (
178 "abc",
179 (Cow::from("na</me"), Cow::from("url"), Cow::from("")),
180 );
181 assert_eq!(
182 html_text2dest(r#"<a href="url" title="" >na</me</A>abc"#).unwrap(),
183 expected
184 );
185
186 let expected = nom::Err::Error(nom::error::Error::new(
187 r#"<a href="url" title="" >name</a abc"#,
188 nom::error::ErrorKind::AlphaNumeric,
189 ));
190 assert_eq!(
191 parse_attributes(r#"<a href="url" title="" >name</a abc"#).unwrap_err(),
192 expected
193 );
194
195 let expected = (
196 "abc",
197 (
198 Cow::from(
199 "<img src=\"w3html.gif\" alt=\"W3Schools.com \"width=\"100\" height=\"132\">",
200 ),
201 Cow::from("https://blog.getreu.net"),
202 Cow::from(""),
203 ),
204 );
205 assert_eq!(
206 html_text2dest(
207 "<a href=\"https://blog.getreu.net\">\
208 <img src=\"w3html.gif\" alt=\"W3Schools.com \"\
209 width=\"100\" height=\"132\">\
210 </a>abc"
211 )
212 .unwrap(),
213 expected
214 );
215 }
216
217 #[test]
218 fn test_tag_a_opening() {
219 let expected = (
220 "abc",
221 (Cow::from("http://getreu.net"), Cow::from("My blog")),
222 );
223 assert_eq!(
224 tag_a_opening(r#"<a href="http://getreu.net" title="My blog">abc"#).unwrap(),
225 expected
226 );
227 assert_eq!(
228 tag_a_opening(r#"<A href="http://getreu.net" title="My blog">abc"#).unwrap(),
229 expected
230 );
231 }
232
233 #[test]
234 fn test_parse_attributes() {
235 let expected = ("", (Cow::from("http://getreu.net"), Cow::from("My blog")));
236 assert_eq!(
237 parse_attributes(r#"abc href="http://getreu.net" abc title="My blog" abc"#).unwrap(),
238 expected
239 );
240
241 let expected = nom::Err::Error(nom::error::Error::new(
242 "href",
243 nom::error::ErrorKind::ManyMN,
244 ));
245 assert_eq!(
246 parse_attributes(r#" href="http://getreu.net" href="http://blog.getreu.net" "#)
247 .unwrap_err(),
248 expected
249 );
250
251 let expected = nom::Err::Error(nom::error::Error::new(
252 "title",
253 nom::error::ErrorKind::ManyMN,
254 ));
255 assert_eq!(
256 parse_attributes(r#" href="http://getreu.net" title="a" title="b" "#).unwrap_err(),
257 expected
258 );
259
260 let expected = nom::Err::Error(nom::error::Error::new("", nom::error::ErrorKind::Eof));
261 assert_eq!(
262 parse_attributes(r#" title="title" "#).unwrap_err(),
263 expected
264 );
265 }
266
267 #[test]
268 fn test_attribute_list() {
269 let expected = (
270 "",
271 vec![
272 ("", Cow::from("")),
273 ("href", Cow::from("http://getreu.net")),
274 ("", Cow::from("")),
275 ("title", Cow::from("My blog")),
276 ("", Cow::from("")),
277 ],
278 );
279 assert_eq!(
280 attribute_list(r#"abc href="http://getreu.net" abc title="My blog" abc"#).unwrap(),
281 expected
282 );
283 }
284
285 #[test]
286 fn test_attribute() {
287 let expected = (" abc", ("href", Cow::from("http://getreu.net")));
288 assert_eq!(
289 attribute(r#"href="http://getreu.net" abc"#).unwrap(),
290 expected
291 );
292 assert_eq!(
293 attribute(r#"href='http://getreu.net' abc"#).unwrap(),
294 expected
295 );
296 assert_eq!(
298 attribute(r#"href=http://getreu.net abc"#).unwrap(),
299 expected
300 );
301
302 let expected = (" abc", ("href", Cow::from("http://getreu.net/<>")));
303 assert_eq!(
304 attribute(r#"href="http://getreu.net/<>" abc"#).unwrap(),
305 expected
306 );
307 assert_eq!(
308 attribute(r#"href='http://getreu.net/<>' abc"#).unwrap(),
309 expected
310 );
311 assert_eq!(
313 attribute(r#"href=http://getreu.net/<> abc"#).unwrap(),
314 expected
315 );
316
317 let expected = (" abc", ("", Cow::from("")));
318 assert_eq!(attribute("bool abc").unwrap(), expected);
319
320 let expected = nom::Err::Error(nom::error::Error::new(
321 "1name",
322 nom::error::ErrorKind::Verify,
323 ));
324 assert_eq!(attribute("1name").unwrap_err(), expected);
325
326 let expected = nom::Err::Error(nom::error::Error::new(
327 r#"1name="http://getreu.net"#,
328 nom::error::ErrorKind::Verify,
329 ));
330 assert_eq!(
331 attribute(r#"1name="http://getreu.net"#).unwrap_err(),
332 expected
333 );
334 }
335}