parse_hyperlinks/parser/
wikitext.rs

1//! This module implements parsers for Wikitext hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::percent_decode;
6use crate::parser::Link;
7use nom::branch::alt;
8use nom::bytes::complete::is_not;
9use nom::bytes::complete::tag;
10use std::borrow::Cow;
11
12/// Wrapper around `wikitext_text2dest()` that packs the result in
13/// `Link::Text2Dest`.
14pub fn wikitext_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
15    let (i, (te, de, ti)) = wikitext_text2dest(i)?;
16    Ok((i, Link::Text2Dest(te, de, ti)))
17}
18
19/// Parse an Wikitext _inline hyperlink_.
20///
21/// It returns either `Ok((i, (link_text, link_destination, Cow::from("")))`
22/// or some error.
23///
24/// The parser expects to start at the link start (`[`) to succeed.
25/// ```
26/// use parse_hyperlinks::parser::Link;
27/// use parse_hyperlinks::parser::wikitext::wikitext_text2dest;
28/// use std::borrow::Cow;
29///
30/// let expected = (
31///     "abc",
32///     (
33///         Cow::from("W3Schools"),
34///         Cow::from("https://www.w3schools.com/"),
35///         Cow::from(""),
36///     ),
37/// );
38/// assert_eq!(
39///     wikitext_text2dest("[https://www.w3schools.com/ W3Schools]abc").unwrap(),
40///     expected
41/// );
42/// ```
43pub fn wikitext_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
44    let (i, (link_text, link_destination)) = nom::sequence::delimited(
45        // HTML is case insensitive. XHTML, that is being XML is case sensitive.
46        // Here we deal with HTML.
47        tag("["),
48        nom::combinator::map_parser(is_not("]\n\r"), parse_inner),
49        tag("]"),
50    )(i)?;
51    Ok((i, (link_text, link_destination, Cow::from(""))))
52}
53
54/// Parse link destination and link text.
55fn parse_inner(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
56    let (i, link_destination) = nom::sequence::terminated(
57        nom::combinator::map_parser(
58            nom::bytes::complete::take_till(|c| c == ' ' || c == '\t'),
59            parse_url,
60        ),
61        nom::character::complete::space0,
62    )(i)?;
63    let link_text = i;
64    Ok((i, (Cow::from(link_text), link_destination)))
65}
66
67/// Parse URL.
68fn parse_url(i: &str) -> nom::IResult<&str, Cow<str>> {
69    nom::sequence::preceded(
70        nom::combinator::peek(alt((tag("http:"), tag("https:"), tag("mailto:")))),
71        percent_decode,
72    )(i)
73}
74
75#[test]
76fn test_wikitext_text2dest() {
77    let expected = (
78        "abc",
79        (
80            Cow::from("W3Schools"),
81            Cow::from("https://www.w3schools.com/"),
82            Cow::from(""),
83        ),
84    );
85    assert_eq!(
86        wikitext_text2dest(r#"[https://www.w3schools.com/ W3Schools]abc"#).unwrap(),
87        expected
88    );
89    assert_eq!(
90        wikitext_text2dest(r#"[https://www.w3schools.com/   W3Schools]abc"#).unwrap(),
91        expected
92    );
93    let expected = (
94        "abc",
95        (
96            Cow::from("W3Schools"),
97            Cow::from("http://www.w3schools.com/"),
98            Cow::from(""),
99        ),
100    );
101    assert_eq!(
102        wikitext_text2dest(r#"[http://www.w3schools.com/ W3Schools]abc"#).unwrap(),
103        expected
104    );
105    let expected = (
106        "abc",
107        (
108            Cow::from("W3Schools website"),
109            Cow::from("http://www.w3schools.com/"),
110            Cow::from(""),
111        ),
112    );
113    assert_eq!(
114        wikitext_text2dest(r#"[http://www.w3schools.com/ W3Schools website]abc"#).unwrap(),
115        expected
116    );
117    assert_eq!(
118        wikitext_text2dest("[http://www.w3schools.com/\tW3Schools website]abc").unwrap(),
119        expected
120    );
121    let expected = (
122        "abc",
123        (
124            Cow::from(""),
125            Cow::from("http://www.w3schools.com/"),
126            Cow::from(""),
127        ),
128    );
129    assert_eq!(
130        wikitext_text2dest(r#"[http://www.w3schools.com/]abc"#).unwrap(),
131        expected
132    );
133    assert_eq!(
134        wikitext_text2dest(r#"[http://www.w3schools.com/ ]abc"#).unwrap(),
135        expected
136    );
137    assert_eq!(
138        wikitext_text2dest("[http://www.w3schools.com/\t ]abc").unwrap(),
139        expected
140    );
141    let expected = (
142        "abc",
143        (
144            Cow::from("John Don"),
145            Cow::from("mailto:john.don@somemail.com"),
146            Cow::from(""),
147        ),
148    );
149    assert_eq!(
150        wikitext_text2dest(r#"[mailto:john.don@somemail.com John Don]abc"#).unwrap(),
151        expected
152    );
153
154    assert_eq!(
155        wikitext_text2dest(r#"[httpx://www.w3schools.com/ W3Schools]abc"#).unwrap_err(),
156        nom::Err::Error(nom::error::Error::new(
157            "httpx://www.w3schools.com/",
158            nom::error::ErrorKind::Tag
159        ))
160    );
161}