parse_hyperlinks/parser/
wikitext.rs

1//! This module implements parsers for Wikitext hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use crate::parser::percent_decode;
7use nom::Parser;
8use nom::branch::alt;
9use nom::bytes::complete::is_not;
10use nom::bytes::complete::tag;
11use std::borrow::Cow;
12
13/// Wrapper around `wikitext_text2dest()` that packs the result in
14/// `Link::Text2Dest`.
15pub fn wikitext_text2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
16    let (i, (te, de, ti)) = wikitext_text2dest(i)?;
17    Ok((i, Link::Text2Dest(te, de, ti)))
18}
19
20/// Parse an Wikitext _inline hyperlink_.
21///
22/// It returns either `Ok((i, (link_text, link_destination, Cow::from("")))`
23/// or some error.
24///
25/// The parser expects to start at the link start (`[`) to succeed.
26/// ```
27/// use parse_hyperlinks::parser::Link;
28/// use parse_hyperlinks::parser::wikitext::wikitext_text2dest;
29/// use std::borrow::Cow;
30///
31/// let expected = (
32///     "abc",
33///     (
34///         Cow::from("W3Schools"),
35///         Cow::from("https://www.w3schools.com/"),
36///         Cow::from(""),
37///     ),
38/// );
39/// assert_eq!(
40///     wikitext_text2dest("[https://www.w3schools.com/ W3Schools]abc").unwrap(),
41///     expected
42/// );
43/// ```
44pub fn wikitext_text2dest(
45    i: &'_ str,
46) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
47    let (i, (link_text, link_destination)) = nom::sequence::delimited(
48        // HTML is case insensitive. XHTML, that is being XML is case sensitive.
49        // Here we deal with HTML.
50        tag("["),
51        nom::combinator::map_parser(is_not("]\n\r"), parse_inner),
52        tag("]"),
53    )
54    .parse(i)?;
55    Ok((i, (link_text, link_destination, Cow::from(""))))
56}
57
58/// Parse link destination and link text.
59fn parse_inner(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
60    let (i, link_destination) = nom::sequence::terminated(
61        nom::combinator::map_parser(
62            nom::bytes::complete::take_till(|c| c == ' ' || c == '\t'),
63            parse_url,
64        ),
65        nom::character::complete::space0,
66    )
67    .parse(i)?;
68    let link_text = i;
69    Ok((i, (Cow::from(link_text), link_destination)))
70}
71
72/// Parse URL.
73fn parse_url(i: &'_ str) -> nom::IResult<&'_ str, Cow<'_, str>> {
74    nom::sequence::preceded(
75        nom::combinator::peek(alt((tag("http:"), tag("https:"), tag("mailto:")))),
76        percent_decode,
77    )
78    .parse(i)
79}
80
81#[test]
82fn test_wikitext_text2dest() {
83    let expected = (
84        "abc",
85        (
86            Cow::from("W3Schools"),
87            Cow::from("https://www.w3schools.com/"),
88            Cow::from(""),
89        ),
90    );
91    assert_eq!(
92        wikitext_text2dest(r#"[https://www.w3schools.com/ W3Schools]abc"#).unwrap(),
93        expected
94    );
95    assert_eq!(
96        wikitext_text2dest(r#"[https://www.w3schools.com/   W3Schools]abc"#).unwrap(),
97        expected
98    );
99    let expected = (
100        "abc",
101        (
102            Cow::from("W3Schools"),
103            Cow::from("http://www.w3schools.com/"),
104            Cow::from(""),
105        ),
106    );
107    assert_eq!(
108        wikitext_text2dest(r#"[http://www.w3schools.com/ W3Schools]abc"#).unwrap(),
109        expected
110    );
111    let expected = (
112        "abc",
113        (
114            Cow::from("W3Schools website"),
115            Cow::from("http://www.w3schools.com/"),
116            Cow::from(""),
117        ),
118    );
119    assert_eq!(
120        wikitext_text2dest(r#"[http://www.w3schools.com/ W3Schools website]abc"#).unwrap(),
121        expected
122    );
123    assert_eq!(
124        wikitext_text2dest("[http://www.w3schools.com/\tW3Schools website]abc").unwrap(),
125        expected
126    );
127    let expected = (
128        "abc",
129        (
130            Cow::from(""),
131            Cow::from("http://www.w3schools.com/"),
132            Cow::from(""),
133        ),
134    );
135    assert_eq!(
136        wikitext_text2dest(r#"[http://www.w3schools.com/]abc"#).unwrap(),
137        expected
138    );
139    assert_eq!(
140        wikitext_text2dest(r#"[http://www.w3schools.com/ ]abc"#).unwrap(),
141        expected
142    );
143    assert_eq!(
144        wikitext_text2dest("[http://www.w3schools.com/\t ]abc").unwrap(),
145        expected
146    );
147    let expected = (
148        "abc",
149        (
150            Cow::from("John Don"),
151            Cow::from("mailto:john.don@somemail.com"),
152            Cow::from(""),
153        ),
154    );
155    assert_eq!(
156        wikitext_text2dest(r#"[mailto:john.don@somemail.com John Don]abc"#).unwrap(),
157        expected
158    );
159
160    assert_eq!(
161        wikitext_text2dest(r#"[httpx://www.w3schools.com/ W3Schools]abc"#).unwrap_err(),
162        nom::Err::Error(nom::error::Error::new(
163            "httpx://www.w3schools.com/",
164            nom::error::ErrorKind::Tag
165        ))
166    );
167}