1#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use html_escape::decode_html_entities;
7use nom::AsChar;
8use nom::Parser;
9use nom::branch::alt;
10use nom::bytes::complete::is_not;
11use nom::bytes::complete::tag;
12use nom::character::complete::alphanumeric1;
13use nom::error::Error;
14use nom::error::ErrorKind;
15use std::borrow::Cow;
16
17pub fn html_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
20 let (i, (te, de, ti)) = html_text2dest(i)?;
21 Ok((i, Link::Text2Dest(te, de, ti)))
22}
23
24pub fn html_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
40 let (i, ((link_destination, link_title), link_text)) = nom::sequence::terminated(
41 nom::sequence::pair(
42 tag_a_opening,
43 alt((
44 nom::bytes::complete::take_until("</a>"),
45 nom::bytes::complete::take_until("</A>"),
46 )),
47 ),
48 alt((tag("</a>"), tag("</A>"))),
51 )
52 .parse(i)?;
53 let link_text = decode_html_entities(link_text);
54 Ok((i, (link_text, link_destination, link_title)))
55}
56
57pub(crate) fn tag_a_opening(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
60 nom::sequence::delimited(
61 alt((tag("<a "), tag("<A "))),
64 nom::combinator::map_parser(is_not(">"), parse_attributes),
65 tag(">"),
66 )
67 .parse(i)
68}
69
70fn attribute(i: &str) -> nom::IResult<&str, (&str, Cow<str>)> {
73 alt((
74 nom::sequence::pair(
75 nom::combinator::verify(alphanumeric1, |s: &str| {
76 s.chars().next().is_some_and(|c| c.is_alpha())
77 }),
78 alt((
79 nom::combinator::value(Cow::from(""), tag(r#"="""#)),
80 nom::combinator::value(Cow::from(""), tag(r#"=''"#)),
81 nom::combinator::map(
82 nom::sequence::delimited(tag("=\""), is_not("\""), tag("\"")),
83 |s: &str| decode_html_entities(s),
84 ),
85 nom::combinator::map(
86 nom::sequence::delimited(tag("='"), is_not("'"), tag("'")),
87 |s: &str| decode_html_entities(s),
88 ),
89 nom::combinator::map(nom::sequence::preceded(tag("="), is_not(" ")), |s: &str| {
90 decode_html_entities(s)
91 }),
92 )),
93 ),
94 nom::combinator::value(
96 ("", Cow::from("")),
97 nom::combinator::verify(alphanumeric1, |s: &str| {
98 s.chars().next().is_some_and(|c| c.is_alpha())
99 }),
100 ),
101 ))
102 .parse(i)
103}
104
105pub fn attribute_list(i: &str) -> nom::IResult<&str, Vec<(&str, Cow<str>)>> {
107 let i = i.trim();
108 nom::multi::separated_list1(nom::character::complete::multispace1, attribute).parse(i)
109}
110
111fn parse_attributes(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
115 let (i, attributes) = attribute_list(i)?;
116 let mut href = Cow::Borrowed("");
117 let mut title = Cow::Borrowed("");
118
119 for (name, value) in attributes {
120 if name == "href" {
121 if !(*href).is_empty() {
124 return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
125 }
126 href = value;
127 } else if name == "title" {
128 if !(*title).is_empty() {
131 return Err(nom::Err::Error(Error::new(name, ErrorKind::ManyMN)));
132 }
133 title = value;
134 }
135 }
136
137 if (*href).is_empty() {
139 return Err(nom::Err::Error(Error::new(i, ErrorKind::Eof)));
140 };
141
142 Ok((i, (href, title)))
143}
144
145#[cfg(test)]
146mod tests {
147 use super::*;
148
149 #[test]
150 fn test_html_text2dest() {
151 let expected = (
152 "abc",
153 (
154 Cow::from("W3Schools"),
155 Cow::from("https://www.w3schools.com/"),
156 Cow::from("W3S"),
157 ),
158 );
159 assert_eq!(
160 html_text2dest(r#"<a title="W3S" href="https://www.w3schools.com/">W3Schools</a>abc"#)
161 .unwrap(),
162 expected
163 );
164 assert_eq!(
165 html_text2dest(r#"<A title="W3S" href="https://www.w3schools.com/">W3Schools</A>abc"#)
166 .unwrap(),
167 expected
168 );
169
170 let expected = ("abc", (Cow::from("<n>"), Cow::from("h"), Cow::from("t")));
171 assert_eq!(
172 html_text2dest(r#"<a title="t" href="h"><n></a>abc"#).unwrap(),
173 expected
174 );
175
176 let expected = ("abc", (Cow::from("name"), Cow::from("url"), Cow::from("")));
177 assert_eq!(
178 html_text2dest(r#"<a href="url" title="" >name</a>abc"#).unwrap(),
179 expected
180 );
181
182 let expected = (
183 "abc",
184 (Cow::from("na</me"), Cow::from("url"), Cow::from("")),
185 );
186 assert_eq!(
187 html_text2dest(r#"<a href="url" title="" >na</me</A>abc"#).unwrap(),
188 expected
189 );
190
191 let expected = nom::Err::Error(nom::error::Error::new(
192 r#"<a href="url" title="" >name</a abc"#,
193 nom::error::ErrorKind::AlphaNumeric,
194 ));
195 assert_eq!(
196 parse_attributes(r#"<a href="url" title="" >name</a abc"#).unwrap_err(),
197 expected
198 );
199
200 let expected = (
201 "abc",
202 (
203 Cow::from(
204 "<img src=\"w3html.gif\" alt=\"W3Schools.com \"width=\"100\" height=\"132\">",
205 ),
206 Cow::from("https://blog.getreu.net"),
207 Cow::from(""),
208 ),
209 );
210 assert_eq!(
211 html_text2dest(
212 "<a href=\"https://blog.getreu.net\">\
213 <img src=\"w3html.gif\" alt=\"W3Schools.com \"\
214 width=\"100\" height=\"132\">\
215 </a>abc"
216 )
217 .unwrap(),
218 expected
219 );
220 }
221
222 #[test]
223 fn test_tag_a_opening() {
224 let expected = (
225 "abc",
226 (Cow::from("http://getreu.net"), Cow::from("My blog")),
227 );
228 assert_eq!(
229 tag_a_opening(r#"<a href="http://getreu.net" title="My blog">abc"#).unwrap(),
230 expected
231 );
232 assert_eq!(
233 tag_a_opening(r#"<A href="http://getreu.net" title="My blog">abc"#).unwrap(),
234 expected
235 );
236 }
237
238 #[test]
239 fn test_parse_attributes() {
240 let expected = ("", (Cow::from("http://getreu.net"), Cow::from("My blog")));
241 assert_eq!(
242 parse_attributes(r#"abc href="http://getreu.net" abc title="My blog" abc"#).unwrap(),
243 expected
244 );
245
246 let expected = nom::Err::Error(nom::error::Error::new(
247 "href",
248 nom::error::ErrorKind::ManyMN,
249 ));
250 assert_eq!(
251 parse_attributes(r#" href="http://getreu.net" href="http://blog.getreu.net" "#)
252 .unwrap_err(),
253 expected
254 );
255
256 let expected = nom::Err::Error(nom::error::Error::new(
257 "title",
258 nom::error::ErrorKind::ManyMN,
259 ));
260 assert_eq!(
261 parse_attributes(r#" href="http://getreu.net" title="a" title="b" "#).unwrap_err(),
262 expected
263 );
264
265 let expected = nom::Err::Error(nom::error::Error::new("", nom::error::ErrorKind::Eof));
266 assert_eq!(
267 parse_attributes(r#" title="title" "#).unwrap_err(),
268 expected
269 );
270 }
271
272 #[test]
273 fn test_attribute_list() {
274 let expected = (
275 "",
276 vec![
277 ("", Cow::from("")),
278 ("href", Cow::from("http://getreu.net")),
279 ("", Cow::from("")),
280 ("title", Cow::from("My blog")),
281 ("", Cow::from("")),
282 ],
283 );
284 assert_eq!(
285 attribute_list(r#"abc href="http://getreu.net" abc title="My blog" abc"#).unwrap(),
286 expected
287 );
288 }
289
290 #[test]
291 fn test_attribute() {
292 let expected = (" abc", ("href", Cow::from("http://getreu.net")));
293 assert_eq!(
294 attribute(r#"href="http://getreu.net" abc"#).unwrap(),
295 expected
296 );
297 assert_eq!(
298 attribute(r#"href='http://getreu.net' abc"#).unwrap(),
299 expected
300 );
301 assert_eq!(
303 attribute(r#"href=http://getreu.net abc"#).unwrap(),
304 expected
305 );
306
307 let expected = (" abc", ("href", Cow::from("http://getreu.net/<>")));
308 assert_eq!(
309 attribute(r#"href="http://getreu.net/<>" abc"#).unwrap(),
310 expected
311 );
312 assert_eq!(
313 attribute(r#"href='http://getreu.net/<>' abc"#).unwrap(),
314 expected
315 );
316 assert_eq!(
318 attribute(r#"href=http://getreu.net/<> abc"#).unwrap(),
319 expected
320 );
321
322 let expected = (" abc", ("", Cow::from("")));
323 assert_eq!(attribute("bool abc").unwrap(), expected);
324
325 let expected = nom::Err::Error(nom::error::Error::new(
326 "1name",
327 nom::error::ErrorKind::Verify,
328 ));
329 assert_eq!(attribute("1name").unwrap_err(), expected);
330
331 let expected = nom::Err::Error(nom::error::Error::new(
332 r#"1name="http://getreu.net"#,
333 nom::error::ErrorKind::Verify,
334 ));
335 assert_eq!(
336 attribute(r#"1name="http://getreu.net"#).unwrap_err(),
337 expected
338 );
339 }
340}