1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
//! This module implements parsers to extract hyperlinks and link reference
//! definitions from text input.

pub mod asciidoc;
pub mod html;
pub mod html_img;
pub mod markdown;
pub mod markdown_img;
pub mod parse;
pub mod restructured_text;
pub mod wikitext;
use nom::error::ErrorKind;
use percent_encoding::percent_decode_str;
use std::borrow::Cow;

/// A [hyperlink] with the following variants:
/// * an [inline link] `Text2Dev`,
/// * a [reference link] `Text2Label`,
/// * a [link reference definition] `Label2Dest`,
/// * a [combined inline link / link reference definition] `TextLabel2Dest`,
/// * a [reference alias] `Label2Label`,
/// * an [inline image] `Image` or
/// * an [inline link with embedded inline image] `Image2Dest`
/// This is the main return type of this API.
///
/// The _link title_ in Markdown is optional, when not given the string is set
/// to the empty string `""`.  The back ticks \` in reStructuredText can be
/// omitted when only one word is enclosed without spaces.
///
/// [markup hyperlink]: https://spec.commonmark.org/0.30/#links)
/// [reference link]: https://spec.commonmark.org/0.30/#reference-link
/// [link reference definition]: https://spec.commonmark.org/0.30/#link-reference-definition
/// [combined inline link / link reference definition]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#hyperlink-references
/// [reference alias]: https://docutils.sourceforge.io/docs/ref/rst/restructuredtext.html#hyperlink-references
/// [inline image]: https://spec.commonmark.org/0.30/#images
/// [inline link with embedded inline image]: https://spec.commonmark.org/0.30/#example-519
#[derive(Debug, PartialEq, Clone)]
#[non_exhaustive]
pub enum Link<'a> {
    /// An _inline link_ with the following tuple values:
    /// ```text
    /// Text2Dest(link_text, link_destination, link_title)
    /// ```
    /// In (stand alone) **inline links** the destination and title are given
    /// immediately after the link text. When an _inline link_ is rendered, only
    /// the `link_text` is visible in the continuous text.
    /// * Markdown example:
    ///   ```md
    ///       [link_text](link_dest "link title")
    ///   ```
    /// * reStructuredText example:
    ///   ```rst
    ///       `link_text <link_dest>`__
    ///   ```
    /// *  Asciidoc example:
    ///    ```adoc
    ///    http://link_dest[link_text]
    ///    ```
    /// *  Wikitext example:
    ///    ```wm
    ///    [http://link_dest link_text]
    ///    ```
    Text2Dest(Cow<'a, str>, Cow<'a, str>, Cow<'a, str>),

    /// A _reference link_ with the following tuple values:
    /// ```text
    /// Text2Label(link_text, link_label)
    /// ```
    /// In **reference links** the destination and title are defined elsewhere
    /// in the document in some _link reference definition_. When a _reference
    /// link_ is rendered only `link_text` is visible.
    /// * Markdown examples:
    ///   ```md
    ///   [link_text][link_label]
    ///
    ///   [link_text]
    ///   ```
    ///   When only _link text_ is given, _link label_ is set to the same string.
    /// * reStructuredText examples:
    ///   ```rst
    ///   `link_text <link_label_>`_
    ///
    ///   `link_text`_
    ///   ```
    ///   When only _link text_ is given, _link label_ is set to the same string.
    /// * Asciidoc example:
    ///   ```adoc
    ///   {link_label}[link_text]
    ///   ```
    Text2Label(Cow<'a, str>, Cow<'a, str>),

    /// A _link reference definition_ with the following tuple values:
    /// ```text
    /// Label2Dest(link_label, link_destination, link_title)
    /// ```
    /// A **link reference definition** refers to a _reference link_ with the
    /// same _link label_. A _link reference definition_ is not visible
    /// when the document is rendered.
    /// _link title_ is optional.
    /// * Markdown example:
    ///   ```md
    ///   [link_label]: link_dest "link title"
    ///   ```
    /// * reStructuredText examples:
    ///   ```rst
    ///   .. _`link_label`: link_dest
    ///
    ///   .. __: link_dest
    ///
    ///   __ link_dest
    ///   ```
    ///   When `__` is given, the _link label_ is set to `"_"`, which is a marker
    ///   for an anonymous _link label_.
    /// * Asciidoc example:
    ///   ```adoc
    ///   :link_label: http://link_dest
    ///   ```
    Label2Dest(Cow<'a, str>, Cow<'a, str>, Cow<'a, str>),

    /// An _inline link/link reference definition'_ with tuple values:
    /// ```text
    /// Label2Dest(link_text_label, link_destination, link_title)
    /// ```
    /// This type represents a combined **inline link** and **link reference
    /// definition**. Semantically `TextLabel2Dest` is a shorthand for two links
    /// `Text2Dest` and `Label2Dest` in one object, where _link text_ and _link
    /// label_ are the same string. When rendered, _link text_ is visible.
    ///
    /// * Consider the following reStructuredText link:
    ///   ```rst
    ///   `link_text_label <link_dest>`_
    ///
    ///   `a <b>`_
    ///   ```
    ///   In this link is `b` the _link destination_ and `a` has a double role:
    ///   it defines _link text_ of the first link `Text2Dest("a", "b", "")` and
    ///   _link label_ of the second link `Label2Dest("a", "b", "")`.
    ///
    TextLabel2Dest(Cow<'a, str>, Cow<'a, str>, Cow<'a, str>),

    /// A _reference alias_ with the following tuple values:
    /// ```text
    /// Label2Label(alt_link_label, link_label)
    /// ```
    /// The **reference alias** defines an alternative link label
    /// `alt_link_label` for an existing `link_label` defined elsewhere in the
    /// document. At some point, the `link_label` must be resolved to a
    /// `link_destination` by a _link_reference_definition_. A _reference
    /// alias_ is not visible when the document is rendered.
    /// This link type is only available in reStructuredText, e.g.
    /// ```rst
    /// .. _`alt_link_label`: `link_label`_
    /// ```
    Label2Label(Cow<'a, str>, Cow<'a, str>),

    /// An _inline image_ with the following tuple values:
    /// ```text
    /// Image(img_alt, img_src)
    /// ```
    /// Note: this crate does not contain parsers for this variant.
    Image(Cow<'a, str>, Cow<'a, str>),

    /// An _inline link_ with embedded _inline image_ and the following
    /// tuple values.
    /// ```text
    /// Image2Text(text1, img_alt, img_src, text2, dest, title)
    /// ```
    Image2Dest(
        Cow<'a, str>,
        Cow<'a, str>,
        Cow<'a, str>,
        Cow<'a, str>,
        Cow<'a, str>,
        Cow<'a, str>,
    ),
}

/// A parser that decodes percent encoded URLS.
/// This parser consumes all input. It returns `Err` when the percent-decoded
/// bytes are not well-formed in UTF-8.
/// ```text
/// use std::borrow::Cow;
///
/// let res = percent_decode("https://getreu.net/?q=%5Ba%20b%5D").unwrap();
/// assert_eq!(res, ("", Cow::Owned("https://getreu.net/?q=[a b]".to_string())));
///```
fn percent_decode(i: &str) -> nom::IResult<&str, Cow<str>> {
    let decoded = percent_decode_str(i)
        .decode_utf8()
        .map_err(|_| nom::Err::Error(nom::error::Error::new(i, ErrorKind::EscapedTransform)))?;
    Ok(("", decoded))
}

#[test]
fn test_percent_decode() {
    let res = percent_decode("percent%20encoded string").unwrap();
    assert!(matches!(res.1, Cow::Owned(..)));
    assert_eq!(res.1, Cow::from("percent encoded string"));

    let res = percent_decode("nothing").unwrap();
    assert!(matches!(res.1, Cow::Borrowed(..)));
    assert_eq!(res.1, Cow::from("nothing"));
}