parse_hyperlinks/parser/
wikitext.rs

1//! This module implements parsers for Wikitext hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use crate::parser::percent_decode;
7use nom::Parser;
8use nom::branch::alt;
9use nom::bytes::complete::is_not;
10use nom::bytes::complete::tag;
11use std::borrow::Cow;
12
13/// Wrapper around `wikitext_text2dest()` that packs the result in
14/// `Link::Text2Dest`.
15pub fn wikitext_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
16    let (i, (te, de, ti)) = wikitext_text2dest(i)?;
17    Ok((i, Link::Text2Dest(te, de, ti)))
18}
19
20/// Parse an Wikitext _inline hyperlink_.
21///
22/// It returns either `Ok((i, (link_text, link_destination, Cow::from("")))`
23/// or some error.
24///
25/// The parser expects to start at the link start (`[`) to succeed.
26/// ```
27/// use parse_hyperlinks::parser::Link;
28/// use parse_hyperlinks::parser::wikitext::wikitext_text2dest;
29/// use std::borrow::Cow;
30///
31/// let expected = (
32///     "abc",
33///     (
34///         Cow::from("W3Schools"),
35///         Cow::from("https://www.w3schools.com/"),
36///         Cow::from(""),
37///     ),
38/// );
39/// assert_eq!(
40///     wikitext_text2dest("[https://www.w3schools.com/ W3Schools]abc").unwrap(),
41///     expected
42/// );
43/// ```
44pub fn wikitext_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
45    let (i, (link_text, link_destination)) = nom::sequence::delimited(
46        // HTML is case insensitive. XHTML, that is being XML is case sensitive.
47        // Here we deal with HTML.
48        tag("["),
49        nom::combinator::map_parser(is_not("]\n\r"), parse_inner),
50        tag("]"),
51    )
52    .parse(i)?;
53    Ok((i, (link_text, link_destination, Cow::from(""))))
54}
55
56/// Parse link destination and link text.
57fn parse_inner(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
58    let (i, link_destination) = nom::sequence::terminated(
59        nom::combinator::map_parser(
60            nom::bytes::complete::take_till(|c| c == ' ' || c == '\t'),
61            parse_url,
62        ),
63        nom::character::complete::space0,
64    )
65    .parse(i)?;
66    let link_text = i;
67    Ok((i, (Cow::from(link_text), link_destination)))
68}
69
70/// Parse URL.
71fn parse_url(i: &str) -> nom::IResult<&str, Cow<str>> {
72    nom::sequence::preceded(
73        nom::combinator::peek(alt((tag("http:"), tag("https:"), tag("mailto:")))),
74        percent_decode,
75    )
76    .parse(i)
77}
78
79#[test]
80fn test_wikitext_text2dest() {
81    let expected = (
82        "abc",
83        (
84            Cow::from("W3Schools"),
85            Cow::from("https://www.w3schools.com/"),
86            Cow::from(""),
87        ),
88    );
89    assert_eq!(
90        wikitext_text2dest(r#"[https://www.w3schools.com/ W3Schools]abc"#).unwrap(),
91        expected
92    );
93    assert_eq!(
94        wikitext_text2dest(r#"[https://www.w3schools.com/   W3Schools]abc"#).unwrap(),
95        expected
96    );
97    let expected = (
98        "abc",
99        (
100            Cow::from("W3Schools"),
101            Cow::from("http://www.w3schools.com/"),
102            Cow::from(""),
103        ),
104    );
105    assert_eq!(
106        wikitext_text2dest(r#"[http://www.w3schools.com/ W3Schools]abc"#).unwrap(),
107        expected
108    );
109    let expected = (
110        "abc",
111        (
112            Cow::from("W3Schools website"),
113            Cow::from("http://www.w3schools.com/"),
114            Cow::from(""),
115        ),
116    );
117    assert_eq!(
118        wikitext_text2dest(r#"[http://www.w3schools.com/ W3Schools website]abc"#).unwrap(),
119        expected
120    );
121    assert_eq!(
122        wikitext_text2dest("[http://www.w3schools.com/\tW3Schools website]abc").unwrap(),
123        expected
124    );
125    let expected = (
126        "abc",
127        (
128            Cow::from(""),
129            Cow::from("http://www.w3schools.com/"),
130            Cow::from(""),
131        ),
132    );
133    assert_eq!(
134        wikitext_text2dest(r#"[http://www.w3schools.com/]abc"#).unwrap(),
135        expected
136    );
137    assert_eq!(
138        wikitext_text2dest(r#"[http://www.w3schools.com/ ]abc"#).unwrap(),
139        expected
140    );
141    assert_eq!(
142        wikitext_text2dest("[http://www.w3schools.com/\t ]abc").unwrap(),
143        expected
144    );
145    let expected = (
146        "abc",
147        (
148            Cow::from("John Don"),
149            Cow::from("mailto:john.don@somemail.com"),
150            Cow::from(""),
151        ),
152    );
153    assert_eq!(
154        wikitext_text2dest(r#"[mailto:john.don@somemail.com John Don]abc"#).unwrap(),
155        expected
156    );
157
158    assert_eq!(
159        wikitext_text2dest(r#"[httpx://www.w3schools.com/ W3Schools]abc"#).unwrap_err(),
160        nom::Err::Error(nom::error::Error::new(
161            "httpx://www.w3schools.com/",
162            nom::error::ErrorKind::Tag
163        ))
164    );
165}