parse_hyperlinks/parser/
markdown.rs

1//! This module implements parsers for Markdown hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use crate::parser::parse::LABEL_LEN_MAX;
7use crate::parser::percent_decode;
8use crate::take_until_unbalanced;
9use nom::branch::alt;
10use nom::bytes::complete::tag;
11use nom::character::complete::multispace1;
12use nom::{Parser, combinator::*};
13use std::borrow::Cow;
14
15/// The following character are escapable in _link text_, _link label_, _link
16/// destination_ and _link title_.
17const ESCAPABLE: &str = r###"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"###;
18
19/// Wrapper around `md_text2dest()` that packs the result in
20/// `Link::Text2Dest`.
21pub fn md_text2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
22    let (i, (te, de, ti)) = md_text2dest(i)?;
23    Ok((i, Link::Text2Dest(te, de, ti)))
24}
25
26/// Parses a Markdown _inline link_.
27///
28/// This parser expects to start at the beginning of the link `[` to succeed.
29/// ```
30/// use parse_hyperlinks::parser::Link;
31/// use parse_hyperlinks::parser::markdown::md_text2dest;
32/// use std::borrow::Cow;
33///
34/// assert_eq!(
35///   md_text2dest(r#"[text](<dest> "title")abc"#),
36///   Ok(("abc", (Cow::from("text"), Cow::from("dest"), Cow::from("title"))))
37/// );
38///
39/// assert_eq!(
40///   md_text2dest(r#"<scheme:dest>abc"#),
41///   Ok(("abc", (Cow::from("scheme:dest"), Cow::from("scheme:dest"), Cow::from(""))))
42/// );
43/// assert_eq!(
44///   md_text2dest(r#"<foo@dest>abc"#),
45///   Ok(("abc", (Cow::from("foo@dest"), Cow::from("mailto:foo@dest"), Cow::from(""))))
46/// );
47/// ```
48pub fn md_text2dest(
49    i: &'_ str,
50) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
51    alt((
52        // Parse autolink.
53        nom::sequence::delimited(
54            tag("<"),
55            map_parser(
56                nom::bytes::complete::take_till1(|c: char| {
57                    c.is_ascii_whitespace() || c == '>' || c == '<'
58                }),
59                alt((md_absolute_uri, md_email_address)),
60            ),
61            tag(">"),
62        ),
63        // Parse inline link.
64        map(
65            (md_link_text, md_link_destination_enclosed),
66            |(a, (b, c))| (a, b, c),
67        ),
68    ))
69    .parse(i)
70}
71
72/// Wrapper around `md_label2dest()` that packs the result in
73/// `Link::Label2Dest`.
74pub fn md_label2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
75    let (i, (l, d, t)) = md_label2dest(i)?;
76    Ok((i, Link::Label2Dest(l, d, t)))
77}
78
79/// Matches a Markdown _link reference definition_.
80///
81/// The caller must guarantee, that the parser starts at first character of the
82/// input or at the first character of a line. The parser consumes all bytes
83/// until the end of the line.
84/// ```
85/// use parse_hyperlinks::parser::Link;
86/// use parse_hyperlinks::parser::markdown::md_label2dest;
87/// use std::borrow::Cow;
88///
89/// assert_eq!(
90///   md_label2dest("   [label]: <destination> 'title'\nabc"),
91///   Ok(("\nabc", (Cow::from("label"), Cow::from("destination"), Cow::from("title"))))
92/// );
93/// ```
94///
95/// [CommonMark
96/// Spec](https://spec.commonmark.org/0.30/#link-reference-definition)\ A [link
97/// reference
98/// definition](https://spec.commonmark.org/0.30/#link-reference-definition)
99/// consists of a [link label](https://spec.commonmark.org/0.30/#link-label),
100/// optionally preceded by up to three spaces of indentation, followed by a
101/// colon (`:`), optional spaces or tabs (including up to one [line
102/// ending](https://spec.commonmark.org/0.30/#line-ending)), a [link
103/// destination](https://spec.commonmark.org/0.30/#link-destination), optional
104/// spaces or tabs (including up to one [line
105/// ending](https://spec.commonmark.org/0.30/#line-ending)), and an optional
106/// [link title](https://spec.commonmark.org/0.30/#link-title), which if it is
107/// present must be separated from the [link
108/// destination](https://spec.commonmark.org/0.30/#link-destination) by spaces
109/// or tabs. No further character may occur.
110///
111/// A [link reference
112/// definition](https://spec.commonmark.org/0.30/#link-reference-definition)
113/// does not correspond to a structural element of a document. Instead, it
114/// defines a label which can be used in [reference
115/// links](https://spec.commonmark.org/0.30/#reference-link) and reference-style
116/// [images](https://spec.commonmark.org/0.30/#images) elsewhere in the
117/// document. [Link reference
118/// definitions](https://spec.commonmark.org/0.30/#link-reference-definition)
119/// can come either before or after the links that use them.
120pub fn md_label2dest(
121    i: &'_ str,
122) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
123    // Consume up to three spaces.
124    let (i, _) = nom::bytes::complete::take_while_m_n(0, 3, |c| c == ' ')(i)?;
125    // Take label.
126    let (i, link_text) = md_link_label(i)?;
127    let (i, _) = nom::character::complete::char(':')(i)?;
128    // Take spaces.
129    let (i, _) = verify(nom::character::complete::multispace1, |s: &str| {
130        !s.contains("\n\n")
131    })
132    .parse(i)?;
133    // Take destination.
134    let (i, link_destination) = md_link_destination(i)?;
135    // Try, but do not fail.
136    let (i, link_title) = alt((
137        // Take link title.
138        md_link_title,
139        nom::combinator::success(Cow::from("")),
140    ))
141    .parse(i)?;
142
143    // Now consume as much whitespace as possible.
144    let (i, _) = nom::character::complete::space0(i)?;
145
146    // Check if there is newline coming. Do not consume.
147    if !i.is_empty() {
148        let _ = nom::character::complete::newline(i)?;
149    }
150
151    Ok((i, (link_text, link_destination, link_title)))
152}
153
154/// Wrapper around `md_text2label()` that packs the result in
155/// `Link::Text2Label`.
156pub fn md_text2label_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
157    let (i, (t, l)) = md_text2label(i)?;
158    Ok((i, Link::Text2Label(t, l)))
159}
160
161/// Parse a Markdown _reference link_.
162///
163/// There are three kinds of reference links: full, collapsed, and shortcut.
164/// 1. A full reference link consists of a link text immediately followed by a
165///    link label that matches a link reference definition elsewhere in the
166///    document.
167/// 2. A collapsed reference link consists of a link label that matches a link
168///    reference definition elsewhere in the document, followed by the string [].
169///    The contents of the first link label are parsed as inlines, which are used as
170///    the link’s text. The link’s URI and title are provided by the matching
171///    reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`.
172/// 3. A shortcut reference link consists of a link label that matches a link
173///    reference definition elsewhere in the document and is not followed by [] or a
174///    link label. The contents of the first link label are parsed as inlines, which
175///    are used as the link’s text. The link’s URI and title are provided by the
176///    matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`.
177///
178/// This parser expects to start at the beginning of the link `[` to succeed.
179/// It should always run at last position after all other parsers.
180/// ```rust
181/// use parse_hyperlinks::parser::Link;
182/// use parse_hyperlinks::parser::markdown::md_text2label;
183/// use std::borrow::Cow;
184///
185/// assert_eq!(
186///   md_text2label("[link text][link label]abc"),
187///   Ok(("abc", (Cow::from("link text"), Cow::from("link label"))))
188/// );
189/// assert_eq!(
190///   md_text2label("[link text][]abc"),
191///   Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
192/// );
193/// assert_eq!(
194///   md_text2label("[link text]abc"),
195///   Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
196/// );
197/// ```
198pub fn md_text2label(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
199    let (i, (link_text, link_label)) = alt((
200        nom::sequence::pair(md_link_text, md_link_label),
201        nom::combinator::map(nom::sequence::terminated(md_link_text, tag("[]")), |s| {
202            (s.clone(), s)
203        }),
204        nom::combinator::map(md_link_text, |s| (s.clone(), s)),
205    ))
206    .parse(i)?;
207
208    // Check that there is no `[` or `(` following. Do not consume.
209    if !i.is_empty() {
210        let _ = nom::character::complete::none_of("[(")(i)?;
211    }
212
213    Ok((i, (link_text, link_label)))
214}
215
216/// Parses _link text_.
217/// Brackets are allowed in the
218/// [link text](https://spec.commonmark.org/0.29/#link-text) only if (a) they are
219/// backslash-escaped or (b) they appear as a matched pair of brackets, with
220/// an open bracket `[`, a sequence of zero or more inlines, and a close
221/// bracket `]`.
222/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-text)
223pub(crate) fn md_link_text(i: &'_ str) -> nom::IResult<&'_ str, Cow<'_, str>> {
224    nom::combinator::map_parser(
225        nom::sequence::delimited(tag("["), take_until_unbalanced('[', ']'), tag("]")),
226        md_escaped_str_transform,
227    )
228    .parse(i)
229}
230
231/// Parses a _link label_.
232/// A link label begins with a left bracket ([) and ends with the first right
233/// bracket (]) that is not backslash-escaped. Between these brackets there must
234/// be at least one non-whitespace character. Unescaped square bracket characters
235/// are not allowed inside the opening and closing square brackets of link
236/// labels. A link label can have at most 999 characters inside the square
237/// brackets (TODO).
238/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-label)
239fn md_link_label(i: &'_ str) -> nom::IResult<&'_ str, Cow<'_, str>> {
240    nom::combinator::map_parser(
241        nom::combinator::verify(
242            nom::sequence::delimited(
243                tag("["),
244                nom::bytes::complete::escaped(
245                    nom::character::complete::none_of("\\[]"),
246                    '\\',
247                    nom::character::complete::one_of(ESCAPABLE),
248                ),
249                tag("]"),
250            ),
251            |l: &str| l.len() <= LABEL_LEN_MAX,
252        ),
253        md_escaped_str_transform,
254    )
255    .parse(i)
256}
257
258/// This is a wrapper around `md_parse_link_destination()`. It takes its result
259/// and removes the `\` before the escaped characters `ESCAPABLE`.
260pub(crate) fn md_link_destination(i: &'_ str) -> nom::IResult<&'_ str, Cow<'_, str>> {
261    nom::combinator::map_parser(md_parse_link_destination, md_escaped_str_transform).parse(i)
262}
263
264/// A [link destination](https://spec.commonmark.org/0.30/#link-destination)
265/// consists of either:
266///
267/// * a sequence of zero or more characters between an opening `<` and a
268///   closing `>` that contains no line endings or unescaped `<` or `>`
269///   characters, or
270/// * a nonempty sequence of characters that does not start with `<`, does not
271///   include [ASCII control
272///   characters](https://spec.commonmark.org/0.30/#ascii-control-character) or
273///   [space](https://spec.commonmark.org/0.30/#space) character, and includes
274///   parentheses only if (a) they are backslash-escaped or (b) they are part
275///   of a balanced pair of unescaped parentheses. (Implementations may impose
276///   limits on parentheses nesting to avoid performance issues, but at least
277///   three levels of nesting should be supported.)
278fn md_parse_link_destination(i: &str) -> nom::IResult<&str, &str> {
279    alt((
280        nom::sequence::delimited(
281            tag("<"),
282            nom::bytes::complete::escaped(
283                nom::character::complete::none_of(r#"\<>"#),
284                '\\',
285                nom::character::complete::one_of(ESCAPABLE),
286            ),
287            tag(">"),
288        ),
289        map(nom::bytes::complete::tag("<>"), |_| ""),
290        alt((
291            nom::bytes::complete::is_not(" \t\r\n"),
292            nom::combinator::success(""),
293        )),
294    ))
295    .parse(i)
296}
297
298/// Matches `md_link_destination` in parenthesis.
299pub(crate) fn md_link_destination_enclosed(
300    i: &'_ str,
301) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
302    map_parser(
303        nom::sequence::delimited(tag("("), take_until_unbalanced('(', ')'), tag(")")),
304        (
305            md_link_destination,
306            alt((
307                // Take link title.
308                md_link_title,
309                nom::combinator::success(Cow::from("")),
310            )),
311        ),
312    )
313    .parse(i)
314}
315
316/// This is a wrapper around `md_parse_link_title()`. It takes its result
317/// and removes the `\` before the escaped characters `ESCAPABLE`.
318fn md_link_title(i: &'_ str) -> nom::IResult<&'_ str, Cow<'_, str>> {
319    nom::combinator::map_parser(md_parse_link_title, md_escaped_str_transform).parse(i)
320}
321
322/// A link title is always preceded one or more whitespace inluding
323/// one newline.
324/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-title)
325/// A [link title](https://spec.commonmark.org/0.29/#link-title) consists of either
326///
327///  - a sequence of zero or more characters between straight double-quote
328///    characters (`"`), including a `"` character only if it is
329///    backslash-escaped, or
330///  - a sequence of zero or more characters between straight single-quote
331///    characters (`'`), including a `'` character only if it is
332///    backslash-escaped, or
333///  - a sequence of zero or more characters between matching parentheses
334///    (`(...)`), including a `(` or `)` character only if it is
335///    backslash-escaped.
336///
337///  Although [link titles](https://spec.commonmark.org/0.29/#link-title) may
338///  span multiple lines, they may not contain a [blank
339///  line](https://spec.commonmark.org/0.29/#blank-line).
340fn md_parse_link_title(i: &str) -> nom::IResult<&str, &str> {
341    nom::sequence::preceded(
342        verify(multispace1, |s: &str| !s.contains("\n\n")),
343        verify(
344            alt((
345                nom::sequence::delimited(tag("("), take_until_unbalanced('(', ')'), tag(")")),
346                nom::sequence::delimited(
347                    tag("'"),
348                    nom::bytes::complete::escaped(
349                        nom::character::complete::none_of(r#"\'"#),
350                        '\\',
351                        nom::character::complete::one_of(ESCAPABLE),
352                    ),
353                    tag("'"),
354                ),
355                nom::sequence::delimited(
356                    tag("\""),
357                    nom::bytes::complete::escaped(
358                        nom::character::complete::none_of(r#"\""#),
359                        '\\',
360                        nom::character::complete::one_of(ESCAPABLE),
361                    ),
362                    tag("\""),
363                ),
364            )),
365            |s: &str| !s.contains("\n\n"),
366        ),
367    )
368    .parse(i)
369}
370
371/// Remove the `\` before the escaped characters `ESCAPABLE`.
372fn md_escaped_str_transform(i: &'_ str) -> nom::IResult<&'_ str, Cow<'_, str>> {
373    nom::combinator::map(
374        nom::bytes::complete::escaped_transform(
375            nom::bytes::complete::is_not("\\"),
376            '\\',
377            nom::character::complete::one_of(ESCAPABLE),
378        ),
379        |s| if s == i { Cow::from(i) } else { Cow::from(s) },
380    )
381    .parse(i)
382}
383
384/// Parses an [absolute URI](https://spec.commonmark.org/0.30/#absolute-uri).
385/// This parser consumes all input to succeed.
386/// An absolute URI, for these purposes, consists of a
387/// [scheme](https://spec.commonmark.org/0.30/#scheme) followed by a
388/// colon (`:`) followed by zero or more characters other [ASCII control
389/// characters](https://spec.commonmark.org/0.30/#ascii-control-character),
390/// [space](https://spec.commonmark.org/0.30/#space), `<`, and `>`. If the
391/// URI includes these characters, they must be percent-encoded (e.g. `%20`
392/// for a space).
393///
394/// For purposes of this spec, a
395/// [scheme](https://spec.commonmark.org/0.30/#scheme) is any sequence of
396/// 2–32 characters beginning with an ASCII letter and followed by any
397/// combination of ASCII letters, digits, or the symbols plus (”+”),
398/// period (”.”), or hyphen (”-”).
399///
400/// [CommonMark Spec](https://spec.commonmark.org/0.30/#autolinks)
401fn md_absolute_uri(
402    i: &'_ str,
403) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
404    let j = i;
405    map(
406        all_consuming(nom::sequence::separated_pair(
407            // Parse scheme.
408            verify(
409                nom::bytes::complete::take_till1(|c: char| {
410                    !(c.is_ascii_alphanumeric() || "+.-".contains(c))
411                }),
412                |s: &str| s.len() >= 2 && s.len() <= 32,
413            ),
414            tag(":"),
415            // Parse domain.
416            map_parser(
417                nom::bytes::complete::take_till1(|c: char| {
418                    c.is_ascii_control() || c.is_ascii_whitespace() || "<>".contains(c)
419                }),
420                percent_decode,
421            ),
422        )),
423        |(scheme, domain)| {
424            let uri = if matches!(domain, Cow::Borrowed(..)) {
425                Cow::Borrowed(j)
426            } else {
427                Cow::Owned(format!("{scheme}:{domain}"))
428            };
429            (uri.clone(), uri, Cow::from(""))
430        },
431    )
432    .parse(i)
433}
434
435/// Parses an Email address. This parser consumes all input to succeed.
436/// As it only checks and forwards, the result type is `Cow::Borrowed`.
437/// The check is not as strict but inspired by
438/// [HTML5 spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)):
439/// and [CommonMark Spec](https://spec.commonmark.org/0.30/#email-autolink)
440/// The link’s label is the email address, and the
441/// URL is `mailto:` followed by the email address.
442///
443fn md_email_address(
444    i: &'_ str,
445) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
446    let j = i;
447    map(
448        all_consuming(nom::sequence::separated_pair(
449            // Parse scheme.
450            nom::bytes::complete::take_till1(|c: char| {
451                !(c.is_alphanumeric() || ".!#$%&'*+\\/=?^_`{|}~-".contains(c))
452            }),
453            tag("@"),
454            // Parse domain.
455            nom::bytes::complete::take_till1(|c: char| !(c.is_alphanumeric() || ".-".contains(c))),
456        )),
457        |(_, _)| {
458            (
459                Cow::Borrowed(j),
460                Cow::Owned(format!("mailto:{}", j.to_owned())),
461                Cow::Borrowed(""),
462            )
463        },
464    )
465    .parse(i)
466}
467
468#[cfg(test)]
469mod tests {
470    use super::*;
471    use nom::error::ErrorKind;
472
473    #[test]
474    fn test_md_text2dest() {
475        assert_eq!(
476            md_text2dest("[text](url)abc"),
477            Ok(("abc", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
478        );
479        assert_eq!(
480            md_text2dest("[text[i]](url)abc"),
481            Ok((
482                "abc",
483                (Cow::from("text[i]"), Cow::from("url"), Cow::from(""))
484            ))
485        );
486        assert_eq!(
487            md_text2dest("[text[i]](ur(l))abc"),
488            Ok((
489                "abc",
490                (Cow::from("text[i]"), Cow::from("ur(l)"), Cow::from(""))
491            ))
492        );
493        assert_eq!(
494            md_text2dest("[text(url)"),
495            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
496        );
497        assert_eq!(
498            md_text2dest("[text](<url>)abc"),
499            Ok(("abc", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
500        );
501        assert_eq!(
502            md_text2dest("[text](<url> \"link title\")abc"),
503            Ok((
504                "abc",
505                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
506            ))
507        );
508        assert_eq!(
509            md_text2dest("[text](url \"link title\")abc"),
510            Ok((
511                "abc",
512                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
513            ))
514        );
515        // [Example 483](https://spec.commonmark.org/0.30/#example-483)
516        assert_eq!(
517            md_text2dest("[](./target.md)abc"),
518            Ok((
519                "abc",
520                (Cow::from(""), Cow::from("./target.md"), Cow::from(""))
521            ))
522        );
523        // [Example 484](https://spec.commonmark.org/0.30/#example-484)
524        assert_eq!(
525            md_text2dest("[link]()abc"),
526            Ok(("abc", (Cow::from("link"), Cow::from(""), Cow::from(""))))
527        );
528        // [Example 485](https://spec.commonmark.org/0.30/#example-485)
529        assert_eq!(
530            md_text2dest("[link](<>)abc"),
531            Ok(("abc", (Cow::from("link"), Cow::from(""), Cow::from(""))))
532        );
533        // [Example 486](https://spec.commonmark.org/0.30/#example-486)
534        assert_eq!(
535            md_text2dest("[]()abc"),
536            Ok(("abc", (Cow::from(""), Cow::from(""), Cow::from(""))))
537        );
538        assert_eq!(
539            md_text2dest("[text]abc"),
540            Err(nom::Err::Error(nom::error::Error::new(
541                "abc",
542                ErrorKind::Tag
543            )))
544        );
545        // [Example 597](https://spec.commonmark.org/0.30/#example-597)
546        assert_eq!(
547            md_text2dest("<a+b+c:d>abc"),
548            Ok((
549                "abc",
550                (Cow::from("a+b+c:d"), Cow::from("a+b+c:d"), Cow::from(""))
551            ))
552        );
553        //[Example 603](https://spec.commonmark.org/0.30/#example-603)
554        assert_eq!(
555            md_text2dest("<foo@bar.example.com>abc"),
556            Ok((
557                "abc",
558                (
559                    Cow::from("foo@bar.example.com"),
560                    Cow::from("mailto:foo@bar.example.com"),
561                    Cow::from("")
562                )
563            ))
564        );
565        assert_eq!(
566            md_text2dest("<foo.example.com>abc"),
567            Err(nom::Err::Error(nom::error::Error::new(
568                "<foo.example.com>abc",
569                ErrorKind::Tag
570            )))
571        );
572        // [Example 20](https://spec.commonmark.org/0.30/#example-20)
573        assert_eq!(
574            md_text2dest(r#"<http://example.com?find=\*>abc"#),
575            Ok((
576                "abc",
577                (
578                    Cow::from(r#"http://example.com?find=\*"#),
579                    Cow::from(r#"http://example.com?find=\*"#),
580                    Cow::from("")
581                )
582            ))
583        );
584        // [Example 22](https://spec.commonmark.org/0.30/#example-22)
585        assert_eq!(
586            md_text2dest(r#"[foo](/bar\* "ti\*tle")abc"#),
587            Ok((
588                "abc",
589                (Cow::from("foo"), Cow::from("/bar*"), Cow::from("ti*tle"))
590            ))
591        );
592    }
593
594    #[test]
595    fn test_md_text2label() {
596        assert_eq!(
597            md_text2label("[link text][link label]abc"),
598            Ok(("abc", (Cow::from("link text"), Cow::from("link label"))))
599        );
600        assert_eq!(
601            md_text2label("[link text][]abc"),
602            Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
603        );
604        assert_eq!(
605            md_text2label("[link text]abc"),
606            Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
607        );
608        assert_eq!(
609            md_text2label("[]abc"),
610            Ok(("abc", (Cow::from(""), Cow::from(""))))
611        );
612        assert_eq!(
613            md_text2label(""),
614            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
615        );
616        // Check end of input position.
617        assert_eq!(
618            md_text2label("[text]"),
619            Ok(("", (Cow::from("text"), Cow::from("text"))))
620        );
621        assert_eq!(
622            md_text2label("[text][text]"),
623            Ok(("", (Cow::from("text"), Cow::from("text"))))
624        );
625        assert_eq!(
626            md_text2label("[text][label url"),
627            Err(nom::Err::Error(nom::error::Error::new(
628                "[label url",
629                ErrorKind::NoneOf
630            )))
631        );
632        assert_eq!(
633            md_text2label("[text](url)abc"),
634            Err(nom::Err::Error(nom::error::Error::new(
635                "(url)abc",
636                ErrorKind::NoneOf
637            )))
638        );
639    }
640
641    #[test]
642    fn test_md_label2dest() {
643        assert_eq!(
644            md_label2dest("[text]: url\nabc"),
645            Ok((
646                "\nabc",
647                (Cow::from("text"), Cow::from("url"), Cow::from(""))
648            ))
649        );
650        assert_eq!(
651            md_label2dest("[text]: url  \nabc"),
652            Ok((
653                "\nabc",
654                (Cow::from("text"), Cow::from("url"), Cow::from(""))
655            ))
656        );
657        assert_eq!(
658            md_label2dest("[text]: <url url> \nabc"),
659            Ok((
660                "\nabc",
661                (Cow::from("text"), Cow::from("url url"), Cow::from(""))
662            ))
663        );
664        assert_eq!(
665            md_label2dest("[text]: url \"title\"\nabc"),
666            Ok((
667                "\nabc",
668                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
669            ))
670        );
671        assert_eq!(
672            md_label2dest("[text]: url\n\"title\"\nabc"),
673            Ok((
674                "\nabc",
675                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
676            ))
677        );
678        assert_eq!(
679            md_label2dest("   [text]: url\n\"title\"\nabc"),
680            Ok((
681                "\nabc",
682                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
683            ))
684        );
685        assert_eq!(
686            md_label2dest("abc[text]: url\n\"title\""),
687            Err(nom::Err::Error(nom::error::Error::new(
688                "abc[text]: url\n\"title\"",
689                ErrorKind::Tag
690            )))
691        );
692        assert_eq!(
693            md_label2dest("    [text]: url\n\"title\" abc"),
694            Err(nom::Err::Error(nom::error::Error::new(
695                " [text]: url\n\"title\" abc",
696                ErrorKind::Tag
697            )))
698        );
699        // Nested brackets.
700        assert_eq!(
701            md_label2dest("[text\\[i\\]]: ur(l)url\nabc"),
702            Ok((
703                "\nabc",
704                (Cow::from("text[i]"), Cow::from("ur(l)url"), Cow::from(""))
705            ))
706        );
707        // Nested but balanced not allowed for link labels.
708        assert_eq!(
709            md_label2dest("[text[i]]: ur(l)(url"),
710            Err(nom::Err::Error(nom::error::Error::new(
711                "[i]]: ur(l)(url",
712                ErrorKind::Tag
713            )))
714        );
715        // Whitespace can have one newline.
716        assert_eq!(
717            md_label2dest("[text]: \nurl"),
718            Ok(("", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
719        );
720        // But only one newline is allowed.
721        assert_eq!(
722            md_label2dest("[text]: \n\nurl"),
723            Err(nom::Err::Error(nom::error::Error::new(
724                " \n\nurl",
725                ErrorKind::Verify
726            )))
727        );
728        assert_eq!(
729            md_label2dest("[text: url"),
730            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
731        );
732        assert_eq!(
733            md_label2dest("[text] url"),
734            Err(nom::Err::Error(nom::error::Error::new(
735                " url",
736                ErrorKind::Char
737            )))
738        );
739        assert_eq!(
740            md_label2dest("[text]: url \"link title\"\nabc"),
741            Ok((
742                "\nabc",
743                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
744            ))
745        );
746        assert_eq!(
747            md_label2dest("[text]: url \"link\ntitle\"\nabc"),
748            Ok((
749                "\nabc",
750                (
751                    Cow::from("text"),
752                    Cow::from("url"),
753                    Cow::from("link\ntitle")
754                )
755            ))
756        );
757        assert_eq!(
758            md_label2dest("[text]: url \"link\ntitle\"abc"),
759            Err(nom::Err::Error(nom::error::Error::new(
760                "abc",
761                ErrorKind::Char
762            )))
763        );
764        assert_eq!(
765            md_label2dest("[text]:\nurl \"link\ntitle\"\nabc"),
766            Ok((
767                "\nabc",
768                (
769                    Cow::from("text"),
770                    Cow::from("url"),
771                    Cow::from("link\ntitle")
772                )
773            ))
774        );
775        assert_eq!(
776            md_label2dest("[text]: url \"link\n\ntitle\"\nabc"),
777            Err(nom::Err::Error(nom::error::Error::new(
778                "\"link\n\ntitle\"\nabc",
779                ErrorKind::Char
780            )))
781        );
782        assert_eq!(
783            md_label2dest("[text]:\n\nurl \"link title\"\nabc"),
784            Err(nom::Err::Error(nom::error::Error::new(
785                "\n\nurl \"link title\"\nabc",
786                ErrorKind::Verify
787            )))
788        );
789        // [Example 23](https://spec.commonmark.org/0.30/#example-23)
790        assert_eq!(
791            md_label2dest(r#"[foo]: /bar\* "ti\*tle""#),
792            Ok((
793                "",
794                (Cow::from("foo"), Cow::from("/bar*"), Cow::from("ti*tle"))
795            ))
796        );
797    }
798
799    #[test]
800    fn test_md_link_text() {
801        assert_eq!(
802            md_link_text("[text](url)"),
803            Ok(("(url)", Cow::from("text")))
804        );
805        assert_eq!(
806            md_link_text("[text[i]](url)"),
807            Ok(("(url)", Cow::from("text[i]")))
808        );
809        assert_eq!(
810            md_link_text(r#"[text\[i\]](url)"#),
811            Ok(("(url)", Cow::from("text[i]")))
812        );
813        assert_eq!(
814            md_link_text("[text(url)"),
815            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
816        );
817        assert_eq!(
818            md_link_text(r#"[te\_xt](url)"#),
819            Ok(("(url)", Cow::from("te_xt")))
820        );
821    }
822
823    #[test]
824    fn test_md_link_label() {
825        assert_eq!(
826            md_link_label("[text]: url"),
827            Ok((": url", Cow::from("text")))
828        );
829        assert_eq!(
830            md_link_label(r#"[text\[i\]]: url"#),
831            Ok((": url", Cow::from("text[i]")))
832        );
833        assert_eq!(
834            md_link_label("[text: url"),
835            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
836        );
837        assert_eq!(
838            md_link_label("[t[ext: url"),
839            Err(nom::Err::Error(nom::error::Error::new(
840                "[ext: url",
841                ErrorKind::Tag
842            )))
843        );
844    }
845
846    #[test]
847    fn test_md_link_destination() {
848        assert_eq!(
849            md_link_destination("url  abc"),
850            Ok(("  abc", Cow::from("url")))
851        );
852        assert_eq!(md_link_destination("url"), Ok(("", Cow::from("url"))));
853        assert_eq!(
854            md_link_destination("url\nabc"),
855            Ok(("\nabc", Cow::from("url")))
856        );
857        assert_eq!(
858            md_link_destination("<url>abc"),
859            Ok(("abc", Cow::from("url")))
860        );
861        assert_eq!(
862            md_link_destination(r#"<u\<r\>l>abc"#),
863            Ok(("abc", Cow::from(r#"u<r>l"#)))
864        );
865        assert_eq!(
866            md_link_destination(r#"u\)r\(l abc"#),
867            Ok((" abc", Cow::from(r#"u)r(l"#)))
868        );
869        assert_eq!(
870            md_link_destination(r#"u(r)l abc"#),
871            Ok((" abc", Cow::from(r#"u(r)l"#)))
872        );
873        assert_eq!(
874            md_link_destination("u(r)l\nabc"),
875            Ok(("\nabc", Cow::from(r#"u(r)l"#)))
876        );
877    }
878
879    #[test]
880    fn test_md_parse_link_destination() {
881        assert_eq!(md_parse_link_destination("<url>abc"), Ok(("abc", "url")));
882        assert_eq!(
883            md_parse_link_destination(r#"<u\<r\>l>abc"#),
884            Ok(("abc", r#"u\<r\>l"#))
885        );
886        assert_eq!(md_parse_link_destination("<url> abc"), Ok((" abc", "url")));
887        assert_eq!(
888            md_parse_link_destination("<url>\nabc"),
889            Ok(("\nabc", "url"))
890        );
891        assert_eq!(
892            md_parse_link_destination("<url 2>abc"),
893            Ok(("abc", "url 2"))
894        );
895        assert_eq!(md_parse_link_destination("url abc"), Ok((" abc", "url")));
896        assert_eq!(
897            md_parse_link_destination("<url(1)> abc"),
898            Ok((" abc", "url(1)"))
899        );
900        assert_eq!(
901            md_parse_link_destination(r#"<[1a]\[1b\](2a)\(2b\)\<3b\>{4a}\{4b\}> abc"#),
902            Ok((" abc", r#"[1a]\[1b\](2a)\(2b\)\<3b\>{4a}\{4b\}"#))
903        );
904        assert_eq!(
905            md_parse_link_destination("ur()l abc"),
906            Ok((" abc", "ur()l"))
907        );
908        assert_eq!(
909            md_parse_link_destination("ur()l\nabc"),
910            Ok(("\nabc", "ur()l"))
911        );
912        assert_eq!(md_parse_link_destination("<>abc"), Ok(("abc", "")));
913        assert_eq!(md_parse_link_destination("<>\nabc"), Ok(("\nabc", "")));
914        assert_eq!(md_parse_link_destination("url"), Ok(("", "url")));
915        assert_eq!(md_parse_link_destination(""), Ok(("", "")));
916        assert_eq!(md_parse_link_destination("\nabc"), Ok(("\nabc", "")));
917    }
918
919    #[test]
920    fn test_md_escaped_str_transform() {
921        assert_eq!(md_escaped_str_transform(""), Ok(("", Cow::from(""))));
922        // Different than the link destination version.
923        assert_eq!(md_escaped_str_transform("   "), Ok(("", Cow::from("   "))));
924        assert_eq!(
925            md_escaped_str_transform(r#"abc`:<>abc"#),
926            Ok(("", Cow::from(r#"abc`:<>abc"#)))
927        );
928        assert_eq!(
929            md_escaped_str_transform(r#"\<\>\\"#),
930            Ok(("", Cow::from(r#"<>\"#)))
931        );
932        assert_eq!(
933            md_escaped_str_transform(r#"\(\)\\"#),
934            Ok(("", Cow::from(r#"()\"#)))
935        );
936        // [Example 12](https://spec.commonmark.org/0.30/#example-12)
937        assert_eq!(
938            md_escaped_str_transform(
939                r#"\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~"#
940            ),
941            Ok(("", Cow::from(r###"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"###)))
942        );
943    }
944
945    #[test]
946    fn test_md_link_title() {
947        // Similar to the
948        // [Example 504](https://spec.commonmark.org/0.30/#example-504)
949        assert_eq!(
950            md_link_title(" (title)abc"),
951            Ok(("abc", Cow::from("title")))
952        );
953        assert_eq!(
954            md_link_title(" (ti(t)le)abc"),
955            Ok(("abc", Cow::from("ti(t)le")))
956        );
957        assert_eq!(
958            md_link_title(r#" (ti\(t\)le)abc"#),
959            Ok(("abc", Cow::from("ti(t)le")))
960        );
961        assert_eq!(
962            md_link_title(r#" "1\\23\"4\'56"abc"#),
963            Ok(("abc", Cow::from(r#"1\23"4'56"#)))
964        );
965        assert_eq!(
966            md_link_title(" \"tu\nvwxy\"abc"),
967            Ok(("abc", Cow::from("tu\nvwxy")))
968        );
969        assert_eq!(
970            md_link_title(" 'tu\nv\\\'wxy'abc"),
971            Ok(("abc", Cow::from("tu\nv\'wxy")))
972        );
973        assert_eq!(
974            md_link_title(" (ti\n\ntle)abc"),
975            Err(nom::Err::Error(nom::error::Error::new(
976                "(ti\n\ntle)abc",
977                ErrorKind::Verify
978            )))
979        );
980    }
981
982    #[test]
983    fn test_md_parse_link_title() {
984        assert_eq!(md_parse_link_title(" (title)abc"), Ok(("abc", "title")));
985        assert_eq!(md_parse_link_title(" (ti(t)le)abc"), Ok(("abc", "ti(t)le")));
986        assert_eq!(
987            md_parse_link_title(r#" "1\\23\"4\'56"abc"#),
988            Ok(("abc", r#"1\\23\"4\'56"#))
989        );
990        assert_eq!(
991            md_parse_link_title(" \"tu\nvwxy\"abc"),
992            Ok(("abc", "tu\nvwxy"))
993        );
994        assert_eq!(
995            md_parse_link_title(" 'tu\nv\\\'wxy'abc"),
996            Ok(("abc", "tu\nv\\\'wxy"))
997        );
998        assert_eq!(
999            md_parse_link_title(" (ti\n\ntle)abc"),
1000            Err(nom::Err::Error(nom::error::Error::new(
1001                "(ti\n\ntle)abc",
1002                ErrorKind::Verify
1003            )))
1004        );
1005    }
1006    #[test]
1007    fn test_md_absolute_uri() {
1008        assert_eq!(
1009            md_absolute_uri("http://domain.com").unwrap().1.0,
1010            Cow::Borrowed("http://domain.com")
1011        );
1012        assert_eq!(
1013            md_absolute_uri("http://domain.com").unwrap().1.1,
1014            Cow::Borrowed("http://domain.com")
1015        );
1016        assert_eq!(
1017            md_absolute_uri("scheme:domain").unwrap().1.1,
1018            Cow::Borrowed("scheme:domain")
1019        );
1020        assert_eq!(
1021            md_absolute_uri("scheme:domain abc"),
1022            Err(nom::Err::Error(nom::error::Error::new(
1023                " abc",
1024                ErrorKind::Eof
1025            )))
1026        );
1027        assert_eq!(
1028            md_absolute_uri("h:domain"),
1029            Err(nom::Err::Error(nom::error::Error::new(
1030                "h:domain",
1031                ErrorKind::Verify
1032            )))
1033        );
1034        assert_eq!(
1035            md_absolute_uri("sche&me:domain"),
1036            Err(nom::Err::Error(nom::error::Error::new(
1037                "&me:domain",
1038                ErrorKind::Tag
1039            )))
1040        );
1041        assert_eq!(
1042            md_absolute_uri("scheme+much+too.long......................:uri"),
1043            Err(nom::Err::Error(nom::error::Error::new(
1044                "scheme+much+too.long......................:uri",
1045                ErrorKind::Verify
1046            )))
1047        );
1048        assert_eq!(
1049            md_absolute_uri("httpÜ:domain abc"),
1050            Err(nom::Err::Error(nom::error::Error::new(
1051                "Ü:domain abc",
1052                ErrorKind::Tag
1053            )))
1054        );
1055        assert_eq!(
1056            md_absolute_uri("no colon"),
1057            Err(nom::Err::Error(nom::error::Error::new(
1058                " colon",
1059                ErrorKind::Tag
1060            )))
1061        );
1062        assert_eq!(
1063            md_absolute_uri("scheme:domai>n"),
1064            Err(nom::Err::Error(nom::error::Error::new(
1065                ">n",
1066                ErrorKind::Eof
1067            )))
1068        );
1069
1070        let res = md_absolute_uri("scheme:domain").unwrap();
1071        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1072        assert_eq!(res.1.0, Cow::from("scheme:domain"));
1073
1074        let res = md_absolute_uri("scheme:domai%25n").unwrap();
1075        assert!(matches!(res.1.0, Cow::Owned(..)));
1076        assert_eq!(res.1.0, Cow::from("scheme:domai%n"));
1077    }
1078
1079    #[test]
1080    fn test_md_email_address() {
1081        let res = md_email_address("local@domain").unwrap();
1082        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1083        assert!(matches!(res.1.1, Cow::Owned(..)));
1084        assert_eq!(res.1.0, Cow::from("local@domain"));
1085        assert_eq!(res.1.1, Cow::from("mailto:local@domain"));
1086
1087        let res = md_email_address("localÜ@domainÜ").unwrap();
1088        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1089        assert!(matches!(res.1.1, Cow::Owned(..)));
1090        assert_eq!(res.1.0, Cow::from("localÜ@domainÜ"));
1091        assert_eq!(res.1.1, Cow::from("mailto:localÜ@domainÜ"));
1092
1093        let res = md_email_address("lo.cal@domain").unwrap();
1094        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1095        assert!(matches!(res.1.1, Cow::Owned(..)));
1096        assert_eq!(res.1.0, Cow::from("lo.cal@domain"));
1097        assert_eq!(res.1.1, Cow::from("mailto:lo.cal@domain"));
1098
1099        assert_eq!(
1100            md_email_address("lo_cal@do_main"),
1101            Err(nom::Err::Error(nom::error::Error::new(
1102                "_main",
1103                ErrorKind::Eof
1104            )))
1105        );
1106    }
1107
1108    /*
1109    #[test]
1110    fn test_md_escaped() {
1111        use nom::IResult;
1112        use nom::bytes::complete::escaped;
1113        use nom::character::complete::one_of;
1114
1115        fn esc(s: &str) -> IResult<&str, &str> {
1116          escaped(nom::character::complete::none_of(r#"\<>"#), '\\', one_of(ESCAPABLE))(s)
1117        }
1118
1119        assert_eq!(esc("123\\>123\\<4>abc"), Ok((">abc", "123\\>123\\<4")));
1120    }
1121    */
1122}