parse_hyperlinks/parser/
markdown.rs

1//! This module implements parsers for Markdown hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use crate::parser::parse::LABEL_LEN_MAX;
7use crate::parser::percent_decode;
8use crate::take_until_unbalanced;
9use nom::branch::alt;
10use nom::bytes::complete::tag;
11use nom::character::complete::multispace1;
12use nom::{Parser, combinator::*};
13use std::borrow::Cow;
14
15/// The following character are escapable in _link text_, _link label_, _link
16/// destination_ and _link title_.
17const ESCAPABLE: &str = r###"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"###;
18
19/// Wrapper around `md_text2dest()` that packs the result in
20/// `Link::Text2Dest`.
21pub fn md_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
22    let (i, (te, de, ti)) = md_text2dest(i)?;
23    Ok((i, Link::Text2Dest(te, de, ti)))
24}
25
26/// Parses a Markdown _inline link_.
27///
28/// This parser expects to start at the beginning of the link `[` to succeed.
29/// ```
30/// use parse_hyperlinks::parser::Link;
31/// use parse_hyperlinks::parser::markdown::md_text2dest;
32/// use std::borrow::Cow;
33///
34/// assert_eq!(
35///   md_text2dest(r#"[text](<dest> "title")abc"#),
36///   Ok(("abc", (Cow::from("text"), Cow::from("dest"), Cow::from("title"))))
37/// );
38///
39/// assert_eq!(
40///   md_text2dest(r#"<scheme:dest>abc"#),
41///   Ok(("abc", (Cow::from("scheme:dest"), Cow::from("scheme:dest"), Cow::from(""))))
42/// );
43/// assert_eq!(
44///   md_text2dest(r#"<foo@dest>abc"#),
45///   Ok(("abc", (Cow::from("foo@dest"), Cow::from("mailto:foo@dest"), Cow::from(""))))
46/// );
47/// ```
48pub fn md_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
49    alt((
50        // Parse autolink.
51        nom::sequence::delimited(
52            tag("<"),
53            map_parser(
54                nom::bytes::complete::take_till1(|c: char| {
55                    c.is_ascii_whitespace() || c == '>' || c == '<'
56                }),
57                alt((md_absolute_uri, md_email_address)),
58            ),
59            tag(">"),
60        ),
61        // Parse inline link.
62        map(
63            (md_link_text, md_link_destination_enclosed),
64            |(a, (b, c))| (a, b, c),
65        ),
66    ))
67    .parse(i)
68}
69
70/// Wrapper around `md_label2dest()` that packs the result in
71/// `Link::Label2Dest`.
72pub fn md_label2dest_link(i: &str) -> nom::IResult<&str, Link> {
73    let (i, (l, d, t)) = md_label2dest(i)?;
74    Ok((i, Link::Label2Dest(l, d, t)))
75}
76
77/// Matches a Markdown _link reference definition_.
78///
79/// The caller must guarantee, that the parser starts at first character of the
80/// input or at the first character of a line. The parser consumes all bytes
81/// until the end of the line.
82/// ```
83/// use parse_hyperlinks::parser::Link;
84/// use parse_hyperlinks::parser::markdown::md_label2dest;
85/// use std::borrow::Cow;
86///
87/// assert_eq!(
88///   md_label2dest("   [label]: <destination> 'title'\nabc"),
89///   Ok(("\nabc", (Cow::from("label"), Cow::from("destination"), Cow::from("title"))))
90/// );
91/// ```
92///
93/// [CommonMark
94/// Spec](https://spec.commonmark.org/0.30/#link-reference-definition)\ A [link
95/// reference
96/// definition](https://spec.commonmark.org/0.30/#link-reference-definition)
97/// consists of a [link label](https://spec.commonmark.org/0.30/#link-label),
98/// optionally preceded by up to three spaces of indentation, followed by a
99/// colon (`:`), optional spaces or tabs (including up to one [line
100/// ending](https://spec.commonmark.org/0.30/#line-ending)), a [link
101/// destination](https://spec.commonmark.org/0.30/#link-destination), optional
102/// spaces or tabs (including up to one [line
103/// ending](https://spec.commonmark.org/0.30/#line-ending)), and an optional
104/// [link title](https://spec.commonmark.org/0.30/#link-title), which if it is
105/// present must be separated from the [link
106/// destination](https://spec.commonmark.org/0.30/#link-destination) by spaces
107/// or tabs. No further character may occur.
108///
109/// A [link reference
110/// definition](https://spec.commonmark.org/0.30/#link-reference-definition)
111/// does not correspond to a structural element of a document. Instead, it
112/// defines a label which can be used in [reference
113/// links](https://spec.commonmark.org/0.30/#reference-link) and reference-style
114/// [images](https://spec.commonmark.org/0.30/#images) elsewhere in the
115/// document. [Link reference
116/// definitions](https://spec.commonmark.org/0.30/#link-reference-definition)
117/// can come either before or after the links that use them.
118pub fn md_label2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
119    // Consume up to three spaces.
120    let (i, _) = nom::bytes::complete::take_while_m_n(0, 3, |c| c == ' ')(i)?;
121    // Take label.
122    let (i, link_text) = md_link_label(i)?;
123    let (i, _) = nom::character::complete::char(':')(i)?;
124    // Take spaces.
125    let (i, _) = verify(nom::character::complete::multispace1, |s: &str| {
126        !s.contains("\n\n")
127    })
128    .parse(i)?;
129    // Take destination.
130    let (i, link_destination) = md_link_destination(i)?;
131    // Try, but do not fail.
132    let (i, link_title) = alt((
133        // Take link title.
134        md_link_title,
135        nom::combinator::success(Cow::from("")),
136    ))
137    .parse(i)?;
138
139    // Now consume as much whitespace as possible.
140    let (i, _) = nom::character::complete::space0(i)?;
141
142    // Check if there is newline coming. Do not consume.
143    if !i.is_empty() {
144        let _ = nom::character::complete::newline(i)?;
145    }
146
147    Ok((i, (link_text, link_destination, link_title)))
148}
149
150/// Wrapper around `md_text2label()` that packs the result in
151/// `Link::Text2Label`.
152pub fn md_text2label_link(i: &str) -> nom::IResult<&str, Link> {
153    let (i, (t, l)) = md_text2label(i)?;
154    Ok((i, Link::Text2Label(t, l)))
155}
156
157/// Parse a Markdown _reference link_.
158///
159/// There are three kinds of reference links: full, collapsed, and shortcut.
160/// 1. A full reference link consists of a link text immediately followed by a
161///    link label that matches a link reference definition elsewhere in the
162///    document.
163/// 2. A collapsed reference link consists of a link label that matches a link
164///    reference definition elsewhere in the document, followed by the string [].
165///    The contents of the first link label are parsed as inlines, which are used as
166///    the link’s text. The link’s URI and title are provided by the matching
167///    reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`.
168/// 3. A shortcut reference link consists of a link label that matches a link
169///    reference definition elsewhere in the document and is not followed by [] or a
170///    link label. The contents of the first link label are parsed as inlines, which
171///    are used as the link’s text. The link’s URI and title are provided by the
172///    matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`.
173///
174/// This parser expects to start at the beginning of the link `[` to succeed.
175/// It should always run at last position after all other parsers.
176/// ```rust
177/// use parse_hyperlinks::parser::Link;
178/// use parse_hyperlinks::parser::markdown::md_text2label;
179/// use std::borrow::Cow;
180///
181/// assert_eq!(
182///   md_text2label("[link text][link label]abc"),
183///   Ok(("abc", (Cow::from("link text"), Cow::from("link label"))))
184/// );
185/// assert_eq!(
186///   md_text2label("[link text][]abc"),
187///   Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
188/// );
189/// assert_eq!(
190///   md_text2label("[link text]abc"),
191///   Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
192/// );
193/// ```
194pub fn md_text2label(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
195    let (i, (link_text, link_label)) = alt((
196        nom::sequence::pair(md_link_text, md_link_label),
197        nom::combinator::map(nom::sequence::terminated(md_link_text, tag("[]")), |s| {
198            (s.clone(), s)
199        }),
200        nom::combinator::map(md_link_text, |s| (s.clone(), s)),
201    ))
202    .parse(i)?;
203
204    // Check that there is no `[` or `(` following. Do not consume.
205    if !i.is_empty() {
206        let _ = nom::character::complete::none_of("[(")(i)?;
207    }
208
209    Ok((i, (link_text, link_label)))
210}
211
212/// Parses _link text_.
213/// Brackets are allowed in the
214/// [link text](https://spec.commonmark.org/0.29/#link-text) only if (a) they are
215/// backslash-escaped or (b) they appear as a matched pair of brackets, with
216/// an open bracket `[`, a sequence of zero or more inlines, and a close
217/// bracket `]`.
218/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-text)
219pub(crate) fn md_link_text(i: &str) -> nom::IResult<&str, Cow<str>> {
220    nom::combinator::map_parser(
221        nom::sequence::delimited(tag("["), take_until_unbalanced('[', ']'), tag("]")),
222        md_escaped_str_transform,
223    )
224    .parse(i)
225}
226
227/// Parses a _link label_.
228/// A link label begins with a left bracket ([) and ends with the first right
229/// bracket (]) that is not backslash-escaped. Between these brackets there must
230/// be at least one non-whitespace character. Unescaped square bracket characters
231/// are not allowed inside the opening and closing square brackets of link
232/// labels. A link label can have at most 999 characters inside the square
233/// brackets (TODO).
234/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-label)
235fn md_link_label(i: &str) -> nom::IResult<&str, Cow<str>> {
236    nom::combinator::map_parser(
237        nom::combinator::verify(
238            nom::sequence::delimited(
239                tag("["),
240                nom::bytes::complete::escaped(
241                    nom::character::complete::none_of("\\[]"),
242                    '\\',
243                    nom::character::complete::one_of(ESCAPABLE),
244                ),
245                tag("]"),
246            ),
247            |l: &str| l.len() <= LABEL_LEN_MAX,
248        ),
249        md_escaped_str_transform,
250    )
251    .parse(i)
252}
253
254/// This is a wrapper around `md_parse_link_destination()`. It takes its result
255/// and removes the `\` before the escaped characters `ESCAPABLE`.
256pub(crate) fn md_link_destination(i: &str) -> nom::IResult<&str, Cow<str>> {
257    nom::combinator::map_parser(md_parse_link_destination, md_escaped_str_transform).parse(i)
258}
259
260/// A [link destination](https://spec.commonmark.org/0.30/#link-destination)
261/// consists of either:
262///
263/// * a sequence of zero or more characters between an opening `<` and a
264///   closing `>` that contains no line endings or unescaped `<` or `>`
265///   characters, or
266/// * a nonempty sequence of characters that does not start with `<`, does not
267///   include [ASCII control
268///   characters](https://spec.commonmark.org/0.30/#ascii-control-character) or
269///   [space](https://spec.commonmark.org/0.30/#space) character, and includes
270///   parentheses only if (a) they are backslash-escaped or (b) they are part
271///   of a balanced pair of unescaped parentheses. (Implementations may impose
272///   limits on parentheses nesting to avoid performance issues, but at least
273///   three levels of nesting should be supported.)
274fn md_parse_link_destination(i: &str) -> nom::IResult<&str, &str> {
275    alt((
276        nom::sequence::delimited(
277            tag("<"),
278            nom::bytes::complete::escaped(
279                nom::character::complete::none_of(r#"\<>"#),
280                '\\',
281                nom::character::complete::one_of(ESCAPABLE),
282            ),
283            tag(">"),
284        ),
285        map(nom::bytes::complete::tag("<>"), |_| ""),
286        alt((
287            nom::bytes::complete::is_not(" \t\r\n"),
288            nom::combinator::success(""),
289        )),
290    ))
291    .parse(i)
292}
293
294/// Matches `md_link_destination` in parenthesis.
295pub(crate) fn md_link_destination_enclosed(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
296    map_parser(
297        nom::sequence::delimited(tag("("), take_until_unbalanced('(', ')'), tag(")")),
298        (
299            md_link_destination,
300            alt((
301                // Take link title.
302                md_link_title,
303                nom::combinator::success(Cow::from("")),
304            )),
305        ),
306    )
307    .parse(i)
308}
309
310/// This is a wrapper around `md_parse_link_title()`. It takes its result
311/// and removes the `\` before the escaped characters `ESCAPABLE`.
312fn md_link_title(i: &str) -> nom::IResult<&str, Cow<str>> {
313    nom::combinator::map_parser(md_parse_link_title, md_escaped_str_transform).parse(i)
314}
315
316/// A link title is always preceded one or more whitespace inluding
317/// one newline.
318/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-title)
319/// A [link title](https://spec.commonmark.org/0.29/#link-title) consists of either
320///
321///  - a sequence of zero or more characters between straight double-quote
322///    characters (`"`), including a `"` character only if it is
323///    backslash-escaped, or
324///  - a sequence of zero or more characters between straight single-quote
325///    characters (`'`), including a `'` character only if it is
326///    backslash-escaped, or
327///  - a sequence of zero or more characters between matching parentheses
328///    (`(...)`), including a `(` or `)` character only if it is
329///    backslash-escaped.
330///
331///  Although [link titles](https://spec.commonmark.org/0.29/#link-title) may
332///  span multiple lines, they may not contain a [blank
333///  line](https://spec.commonmark.org/0.29/#blank-line).
334fn md_parse_link_title(i: &str) -> nom::IResult<&str, &str> {
335    nom::sequence::preceded(
336        verify(multispace1, |s: &str| !s.contains("\n\n")),
337        verify(
338            alt((
339                nom::sequence::delimited(tag("("), take_until_unbalanced('(', ')'), tag(")")),
340                nom::sequence::delimited(
341                    tag("'"),
342                    nom::bytes::complete::escaped(
343                        nom::character::complete::none_of(r#"\'"#),
344                        '\\',
345                        nom::character::complete::one_of(ESCAPABLE),
346                    ),
347                    tag("'"),
348                ),
349                nom::sequence::delimited(
350                    tag("\""),
351                    nom::bytes::complete::escaped(
352                        nom::character::complete::none_of(r#"\""#),
353                        '\\',
354                        nom::character::complete::one_of(ESCAPABLE),
355                    ),
356                    tag("\""),
357                ),
358            )),
359            |s: &str| !s.contains("\n\n"),
360        ),
361    )
362    .parse(i)
363}
364
365/// Remove the `\` before the escaped characters `ESCAPABLE`.
366fn md_escaped_str_transform(i: &str) -> nom::IResult<&str, Cow<str>> {
367    nom::combinator::map(
368        nom::bytes::complete::escaped_transform(
369            nom::bytes::complete::is_not("\\"),
370            '\\',
371            nom::character::complete::one_of(ESCAPABLE),
372        ),
373        |s| if s == i { Cow::from(i) } else { Cow::from(s) },
374    )
375    .parse(i)
376}
377
378/// Parses an [absolute URI](https://spec.commonmark.org/0.30/#absolute-uri).
379/// This parser consumes all input to succeed.
380/// An absolute URI, for these purposes, consists of a
381/// [scheme](https://spec.commonmark.org/0.30/#scheme) followed by a
382/// colon (`:`) followed by zero or more characters other [ASCII control
383/// characters](https://spec.commonmark.org/0.30/#ascii-control-character),
384/// [space](https://spec.commonmark.org/0.30/#space), `<`, and `>`. If the
385/// URI includes these characters, they must be percent-encoded (e.g. `%20`
386/// for a space).
387///
388/// For purposes of this spec, a
389/// [scheme](https://spec.commonmark.org/0.30/#scheme) is any sequence of
390/// 2–32 characters beginning with an ASCII letter and followed by any
391/// combination of ASCII letters, digits, or the symbols plus (”+”),
392/// period (”.”), or hyphen (”-”).
393///
394/// [CommonMark Spec](https://spec.commonmark.org/0.30/#autolinks)
395fn md_absolute_uri(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
396    let j = i;
397    map(
398        all_consuming(nom::sequence::separated_pair(
399            // Parse scheme.
400            verify(
401                nom::bytes::complete::take_till1(|c: char| {
402                    !(c.is_ascii_alphanumeric() || "+.-".contains(c))
403                }),
404                |s: &str| s.len() >= 2 && s.len() <= 32,
405            ),
406            tag(":"),
407            // Parse domain.
408            map_parser(
409                nom::bytes::complete::take_till1(|c: char| {
410                    c.is_ascii_control() || c.is_ascii_whitespace() || "<>".contains(c)
411                }),
412                percent_decode,
413            ),
414        )),
415        |(scheme, domain)| {
416            let uri = if matches!(domain, Cow::Borrowed(..)) {
417                Cow::Borrowed(j)
418            } else {
419                Cow::Owned(format!("{scheme}:{domain}"))
420            };
421            (uri.clone(), uri, Cow::from(""))
422        },
423    )
424    .parse(i)
425}
426
427/// Parses an Email address. This parser consumes all input to succeed.
428/// As it only checks and forwards, the result type is `Cow::Borrowed`.
429/// The check is not as strict but inspired by
430/// [HTML5 spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)):
431/// and [CommonMark Spec](https://spec.commonmark.org/0.30/#email-autolink)
432/// The link’s label is the email address, and the
433/// URL is `mailto:` followed by the email address.
434///
435fn md_email_address(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
436    let j = i;
437    map(
438        all_consuming(nom::sequence::separated_pair(
439            // Parse scheme.
440            nom::bytes::complete::take_till1(|c: char| {
441                !(c.is_alphanumeric() || ".!#$%&'*+\\/=?^_`{|}~-".contains(c))
442            }),
443            tag("@"),
444            // Parse domain.
445            nom::bytes::complete::take_till1(|c: char| !(c.is_alphanumeric() || ".-".contains(c))),
446        )),
447        |(_, _)| {
448            (
449                Cow::Borrowed(j),
450                Cow::Owned(format!("mailto:{}", j.to_owned())),
451                Cow::Borrowed(""),
452            )
453        },
454    )
455    .parse(i)
456}
457
458#[cfg(test)]
459mod tests {
460    use super::*;
461    use nom::error::ErrorKind;
462
463    #[test]
464    fn test_md_text2dest() {
465        assert_eq!(
466            md_text2dest("[text](url)abc"),
467            Ok(("abc", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
468        );
469        assert_eq!(
470            md_text2dest("[text[i]](url)abc"),
471            Ok((
472                "abc",
473                (Cow::from("text[i]"), Cow::from("url"), Cow::from(""))
474            ))
475        );
476        assert_eq!(
477            md_text2dest("[text[i]](ur(l))abc"),
478            Ok((
479                "abc",
480                (Cow::from("text[i]"), Cow::from("ur(l)"), Cow::from(""))
481            ))
482        );
483        assert_eq!(
484            md_text2dest("[text(url)"),
485            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
486        );
487        assert_eq!(
488            md_text2dest("[text](<url>)abc"),
489            Ok(("abc", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
490        );
491        assert_eq!(
492            md_text2dest("[text](<url> \"link title\")abc"),
493            Ok((
494                "abc",
495                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
496            ))
497        );
498        assert_eq!(
499            md_text2dest("[text](url \"link title\")abc"),
500            Ok((
501                "abc",
502                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
503            ))
504        );
505        // [Example 483](https://spec.commonmark.org/0.30/#example-483)
506        assert_eq!(
507            md_text2dest("[](./target.md)abc"),
508            Ok((
509                "abc",
510                (Cow::from(""), Cow::from("./target.md"), Cow::from(""))
511            ))
512        );
513        // [Example 484](https://spec.commonmark.org/0.30/#example-484)
514        assert_eq!(
515            md_text2dest("[link]()abc"),
516            Ok(("abc", (Cow::from("link"), Cow::from(""), Cow::from(""))))
517        );
518        // [Example 485](https://spec.commonmark.org/0.30/#example-485)
519        assert_eq!(
520            md_text2dest("[link](<>)abc"),
521            Ok(("abc", (Cow::from("link"), Cow::from(""), Cow::from(""))))
522        );
523        // [Example 486](https://spec.commonmark.org/0.30/#example-486)
524        assert_eq!(
525            md_text2dest("[]()abc"),
526            Ok(("abc", (Cow::from(""), Cow::from(""), Cow::from(""))))
527        );
528        assert_eq!(
529            md_text2dest("[text]abc"),
530            Err(nom::Err::Error(nom::error::Error::new(
531                "abc",
532                ErrorKind::Tag
533            )))
534        );
535        // [Example 597](https://spec.commonmark.org/0.30/#example-597)
536        assert_eq!(
537            md_text2dest("<a+b+c:d>abc"),
538            Ok((
539                "abc",
540                (Cow::from("a+b+c:d"), Cow::from("a+b+c:d"), Cow::from(""))
541            ))
542        );
543        //[Example 603](https://spec.commonmark.org/0.30/#example-603)
544        assert_eq!(
545            md_text2dest("<foo@bar.example.com>abc"),
546            Ok((
547                "abc",
548                (
549                    Cow::from("foo@bar.example.com"),
550                    Cow::from("mailto:foo@bar.example.com"),
551                    Cow::from("")
552                )
553            ))
554        );
555        assert_eq!(
556            md_text2dest("<foo.example.com>abc"),
557            Err(nom::Err::Error(nom::error::Error::new(
558                "<foo.example.com>abc",
559                ErrorKind::Tag
560            )))
561        );
562        // [Example 20](https://spec.commonmark.org/0.30/#example-20)
563        assert_eq!(
564            md_text2dest(r#"<http://example.com?find=\*>abc"#),
565            Ok((
566                "abc",
567                (
568                    Cow::from(r#"http://example.com?find=\*"#),
569                    Cow::from(r#"http://example.com?find=\*"#),
570                    Cow::from("")
571                )
572            ))
573        );
574        // [Example 22](https://spec.commonmark.org/0.30/#example-22)
575        assert_eq!(
576            md_text2dest(r#"[foo](/bar\* "ti\*tle")abc"#),
577            Ok((
578                "abc",
579                (Cow::from("foo"), Cow::from("/bar*"), Cow::from("ti*tle"))
580            ))
581        );
582    }
583
584    #[test]
585    fn test_md_text2label() {
586        assert_eq!(
587            md_text2label("[link text][link label]abc"),
588            Ok(("abc", (Cow::from("link text"), Cow::from("link label"))))
589        );
590        assert_eq!(
591            md_text2label("[link text][]abc"),
592            Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
593        );
594        assert_eq!(
595            md_text2label("[link text]abc"),
596            Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
597        );
598        assert_eq!(
599            md_text2label("[]abc"),
600            Ok(("abc", (Cow::from(""), Cow::from(""))))
601        );
602        assert_eq!(
603            md_text2label(""),
604            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
605        );
606        // Check end of input position.
607        assert_eq!(
608            md_text2label("[text]"),
609            Ok(("", (Cow::from("text"), Cow::from("text"))))
610        );
611        assert_eq!(
612            md_text2label("[text][text]"),
613            Ok(("", (Cow::from("text"), Cow::from("text"))))
614        );
615        assert_eq!(
616            md_text2label("[text][label url"),
617            Err(nom::Err::Error(nom::error::Error::new(
618                "[label url",
619                ErrorKind::NoneOf
620            )))
621        );
622        assert_eq!(
623            md_text2label("[text](url)abc"),
624            Err(nom::Err::Error(nom::error::Error::new(
625                "(url)abc",
626                ErrorKind::NoneOf
627            )))
628        );
629    }
630
631    #[test]
632    fn test_md_label2dest() {
633        assert_eq!(
634            md_label2dest("[text]: url\nabc"),
635            Ok((
636                "\nabc",
637                (Cow::from("text"), Cow::from("url"), Cow::from(""))
638            ))
639        );
640        assert_eq!(
641            md_label2dest("[text]: url  \nabc"),
642            Ok((
643                "\nabc",
644                (Cow::from("text"), Cow::from("url"), Cow::from(""))
645            ))
646        );
647        assert_eq!(
648            md_label2dest("[text]: <url url> \nabc"),
649            Ok((
650                "\nabc",
651                (Cow::from("text"), Cow::from("url url"), Cow::from(""))
652            ))
653        );
654        assert_eq!(
655            md_label2dest("[text]: url \"title\"\nabc"),
656            Ok((
657                "\nabc",
658                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
659            ))
660        );
661        assert_eq!(
662            md_label2dest("[text]: url\n\"title\"\nabc"),
663            Ok((
664                "\nabc",
665                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
666            ))
667        );
668        assert_eq!(
669            md_label2dest("   [text]: url\n\"title\"\nabc"),
670            Ok((
671                "\nabc",
672                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
673            ))
674        );
675        assert_eq!(
676            md_label2dest("abc[text]: url\n\"title\""),
677            Err(nom::Err::Error(nom::error::Error::new(
678                "abc[text]: url\n\"title\"",
679                ErrorKind::Tag
680            )))
681        );
682        assert_eq!(
683            md_label2dest("    [text]: url\n\"title\" abc"),
684            Err(nom::Err::Error(nom::error::Error::new(
685                " [text]: url\n\"title\" abc",
686                ErrorKind::Tag
687            )))
688        );
689        // Nested brackets.
690        assert_eq!(
691            md_label2dest("[text\\[i\\]]: ur(l)url\nabc"),
692            Ok((
693                "\nabc",
694                (Cow::from("text[i]"), Cow::from("ur(l)url"), Cow::from(""))
695            ))
696        );
697        // Nested but balanced not allowed for link labels.
698        assert_eq!(
699            md_label2dest("[text[i]]: ur(l)(url"),
700            Err(nom::Err::Error(nom::error::Error::new(
701                "[i]]: ur(l)(url",
702                ErrorKind::Tag
703            )))
704        );
705        // Whitespace can have one newline.
706        assert_eq!(
707            md_label2dest("[text]: \nurl"),
708            Ok(("", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
709        );
710        // But only one newline is allowed.
711        assert_eq!(
712            md_label2dest("[text]: \n\nurl"),
713            Err(nom::Err::Error(nom::error::Error::new(
714                " \n\nurl",
715                ErrorKind::Verify
716            )))
717        );
718        assert_eq!(
719            md_label2dest("[text: url"),
720            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
721        );
722        assert_eq!(
723            md_label2dest("[text] url"),
724            Err(nom::Err::Error(nom::error::Error::new(
725                " url",
726                ErrorKind::Char
727            )))
728        );
729        assert_eq!(
730            md_label2dest("[text]: url \"link title\"\nabc"),
731            Ok((
732                "\nabc",
733                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
734            ))
735        );
736        assert_eq!(
737            md_label2dest("[text]: url \"link\ntitle\"\nabc"),
738            Ok((
739                "\nabc",
740                (
741                    Cow::from("text"),
742                    Cow::from("url"),
743                    Cow::from("link\ntitle")
744                )
745            ))
746        );
747        assert_eq!(
748            md_label2dest("[text]: url \"link\ntitle\"abc"),
749            Err(nom::Err::Error(nom::error::Error::new(
750                "abc",
751                ErrorKind::Char
752            )))
753        );
754        assert_eq!(
755            md_label2dest("[text]:\nurl \"link\ntitle\"\nabc"),
756            Ok((
757                "\nabc",
758                (
759                    Cow::from("text"),
760                    Cow::from("url"),
761                    Cow::from("link\ntitle")
762                )
763            ))
764        );
765        assert_eq!(
766            md_label2dest("[text]: url \"link\n\ntitle\"\nabc"),
767            Err(nom::Err::Error(nom::error::Error::new(
768                "\"link\n\ntitle\"\nabc",
769                ErrorKind::Char
770            )))
771        );
772        assert_eq!(
773            md_label2dest("[text]:\n\nurl \"link title\"\nabc"),
774            Err(nom::Err::Error(nom::error::Error::new(
775                "\n\nurl \"link title\"\nabc",
776                ErrorKind::Verify
777            )))
778        );
779        // [Example 23](https://spec.commonmark.org/0.30/#example-23)
780        assert_eq!(
781            md_label2dest(r#"[foo]: /bar\* "ti\*tle""#),
782            Ok((
783                "",
784                (Cow::from("foo"), Cow::from("/bar*"), Cow::from("ti*tle"))
785            ))
786        );
787    }
788
789    #[test]
790    fn test_md_link_text() {
791        assert_eq!(
792            md_link_text("[text](url)"),
793            Ok(("(url)", Cow::from("text")))
794        );
795        assert_eq!(
796            md_link_text("[text[i]](url)"),
797            Ok(("(url)", Cow::from("text[i]")))
798        );
799        assert_eq!(
800            md_link_text(r#"[text\[i\]](url)"#),
801            Ok(("(url)", Cow::from("text[i]")))
802        );
803        assert_eq!(
804            md_link_text("[text(url)"),
805            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
806        );
807        assert_eq!(
808            md_link_text(r#"[te\_xt](url)"#),
809            Ok(("(url)", Cow::from("te_xt")))
810        );
811    }
812
813    #[test]
814    fn test_md_link_label() {
815        assert_eq!(
816            md_link_label("[text]: url"),
817            Ok((": url", Cow::from("text")))
818        );
819        assert_eq!(
820            md_link_label(r#"[text\[i\]]: url"#),
821            Ok((": url", Cow::from("text[i]")))
822        );
823        assert_eq!(
824            md_link_label("[text: url"),
825            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
826        );
827        assert_eq!(
828            md_link_label("[t[ext: url"),
829            Err(nom::Err::Error(nom::error::Error::new(
830                "[ext: url",
831                ErrorKind::Tag
832            )))
833        );
834    }
835
836    #[test]
837    fn test_md_link_destination() {
838        assert_eq!(
839            md_link_destination("url  abc"),
840            Ok(("  abc", Cow::from("url")))
841        );
842        assert_eq!(md_link_destination("url"), Ok(("", Cow::from("url"))));
843        assert_eq!(
844            md_link_destination("url\nabc"),
845            Ok(("\nabc", Cow::from("url")))
846        );
847        assert_eq!(
848            md_link_destination("<url>abc"),
849            Ok(("abc", Cow::from("url")))
850        );
851        assert_eq!(
852            md_link_destination(r#"<u\<r\>l>abc"#),
853            Ok(("abc", Cow::from(r#"u<r>l"#)))
854        );
855        assert_eq!(
856            md_link_destination(r#"u\)r\(l abc"#),
857            Ok((" abc", Cow::from(r#"u)r(l"#)))
858        );
859        assert_eq!(
860            md_link_destination(r#"u(r)l abc"#),
861            Ok((" abc", Cow::from(r#"u(r)l"#)))
862        );
863        assert_eq!(
864            md_link_destination("u(r)l\nabc"),
865            Ok(("\nabc", Cow::from(r#"u(r)l"#)))
866        );
867    }
868
869    #[test]
870    fn test_md_parse_link_destination() {
871        assert_eq!(md_parse_link_destination("<url>abc"), Ok(("abc", "url")));
872        assert_eq!(
873            md_parse_link_destination(r#"<u\<r\>l>abc"#),
874            Ok(("abc", r#"u\<r\>l"#))
875        );
876        assert_eq!(md_parse_link_destination("<url> abc"), Ok((" abc", "url")));
877        assert_eq!(
878            md_parse_link_destination("<url>\nabc"),
879            Ok(("\nabc", "url"))
880        );
881        assert_eq!(
882            md_parse_link_destination("<url 2>abc"),
883            Ok(("abc", "url 2"))
884        );
885        assert_eq!(md_parse_link_destination("url abc"), Ok((" abc", "url")));
886        assert_eq!(
887            md_parse_link_destination("<url(1)> abc"),
888            Ok((" abc", "url(1)"))
889        );
890        assert_eq!(
891            md_parse_link_destination(r#"<[1a]\[1b\](2a)\(2b\)\<3b\>{4a}\{4b\}> abc"#),
892            Ok((" abc", r#"[1a]\[1b\](2a)\(2b\)\<3b\>{4a}\{4b\}"#))
893        );
894        assert_eq!(
895            md_parse_link_destination("ur()l abc"),
896            Ok((" abc", "ur()l"))
897        );
898        assert_eq!(
899            md_parse_link_destination("ur()l\nabc"),
900            Ok(("\nabc", "ur()l"))
901        );
902        assert_eq!(md_parse_link_destination("<>abc"), Ok(("abc", "")));
903        assert_eq!(md_parse_link_destination("<>\nabc"), Ok(("\nabc", "")));
904        assert_eq!(md_parse_link_destination("url"), Ok(("", "url")));
905        assert_eq!(md_parse_link_destination(""), Ok(("", "")));
906        assert_eq!(md_parse_link_destination("\nabc"), Ok(("\nabc", "")));
907    }
908
909    #[test]
910    fn test_md_escaped_str_transform() {
911        assert_eq!(md_escaped_str_transform(""), Ok(("", Cow::from(""))));
912        // Different than the link destination version.
913        assert_eq!(md_escaped_str_transform("   "), Ok(("", Cow::from("   "))));
914        assert_eq!(
915            md_escaped_str_transform(r#"abc`:<>abc"#),
916            Ok(("", Cow::from(r#"abc`:<>abc"#)))
917        );
918        assert_eq!(
919            md_escaped_str_transform(r#"\<\>\\"#),
920            Ok(("", Cow::from(r#"<>\"#)))
921        );
922        assert_eq!(
923            md_escaped_str_transform(r#"\(\)\\"#),
924            Ok(("", Cow::from(r#"()\"#)))
925        );
926        // [Example 12](https://spec.commonmark.org/0.30/#example-12)
927        assert_eq!(
928            md_escaped_str_transform(
929                r#"\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~"#
930            ),
931            Ok(("", Cow::from(r###"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"###)))
932        );
933    }
934
935    #[test]
936    fn test_md_link_title() {
937        // Similar to the
938        // [Example 504](https://spec.commonmark.org/0.30/#example-504)
939        assert_eq!(
940            md_link_title(" (title)abc"),
941            Ok(("abc", Cow::from("title")))
942        );
943        assert_eq!(
944            md_link_title(" (ti(t)le)abc"),
945            Ok(("abc", Cow::from("ti(t)le")))
946        );
947        assert_eq!(
948            md_link_title(r#" (ti\(t\)le)abc"#),
949            Ok(("abc", Cow::from("ti(t)le")))
950        );
951        assert_eq!(
952            md_link_title(r#" "1\\23\"4\'56"abc"#),
953            Ok(("abc", Cow::from(r#"1\23"4'56"#)))
954        );
955        assert_eq!(
956            md_link_title(" \"tu\nvwxy\"abc"),
957            Ok(("abc", Cow::from("tu\nvwxy")))
958        );
959        assert_eq!(
960            md_link_title(" 'tu\nv\\\'wxy'abc"),
961            Ok(("abc", Cow::from("tu\nv\'wxy")))
962        );
963        assert_eq!(
964            md_link_title(" (ti\n\ntle)abc"),
965            Err(nom::Err::Error(nom::error::Error::new(
966                "(ti\n\ntle)abc",
967                ErrorKind::Verify
968            )))
969        );
970    }
971
972    #[test]
973    fn test_md_parse_link_title() {
974        assert_eq!(md_parse_link_title(" (title)abc"), Ok(("abc", "title")));
975        assert_eq!(md_parse_link_title(" (ti(t)le)abc"), Ok(("abc", "ti(t)le")));
976        assert_eq!(
977            md_parse_link_title(r#" "1\\23\"4\'56"abc"#),
978            Ok(("abc", r#"1\\23\"4\'56"#))
979        );
980        assert_eq!(
981            md_parse_link_title(" \"tu\nvwxy\"abc"),
982            Ok(("abc", "tu\nvwxy"))
983        );
984        assert_eq!(
985            md_parse_link_title(" 'tu\nv\\\'wxy'abc"),
986            Ok(("abc", "tu\nv\\\'wxy"))
987        );
988        assert_eq!(
989            md_parse_link_title(" (ti\n\ntle)abc"),
990            Err(nom::Err::Error(nom::error::Error::new(
991                "(ti\n\ntle)abc",
992                ErrorKind::Verify
993            )))
994        );
995    }
996    #[test]
997    fn test_md_absolute_uri() {
998        assert_eq!(
999            md_absolute_uri("http://domain.com").unwrap().1.0,
1000            Cow::Borrowed("http://domain.com")
1001        );
1002        assert_eq!(
1003            md_absolute_uri("http://domain.com").unwrap().1.1,
1004            Cow::Borrowed("http://domain.com")
1005        );
1006        assert_eq!(
1007            md_absolute_uri("scheme:domain").unwrap().1.1,
1008            Cow::Borrowed("scheme:domain")
1009        );
1010        assert_eq!(
1011            md_absolute_uri("scheme:domain abc"),
1012            Err(nom::Err::Error(nom::error::Error::new(
1013                " abc",
1014                ErrorKind::Eof
1015            )))
1016        );
1017        assert_eq!(
1018            md_absolute_uri("h:domain"),
1019            Err(nom::Err::Error(nom::error::Error::new(
1020                "h:domain",
1021                ErrorKind::Verify
1022            )))
1023        );
1024        assert_eq!(
1025            md_absolute_uri("sche&me:domain"),
1026            Err(nom::Err::Error(nom::error::Error::new(
1027                "&me:domain",
1028                ErrorKind::Tag
1029            )))
1030        );
1031        assert_eq!(
1032            md_absolute_uri("scheme+much+too.long......................:uri"),
1033            Err(nom::Err::Error(nom::error::Error::new(
1034                "scheme+much+too.long......................:uri",
1035                ErrorKind::Verify
1036            )))
1037        );
1038        assert_eq!(
1039            md_absolute_uri("httpÜ:domain abc"),
1040            Err(nom::Err::Error(nom::error::Error::new(
1041                "Ü:domain abc",
1042                ErrorKind::Tag
1043            )))
1044        );
1045        assert_eq!(
1046            md_absolute_uri("no colon"),
1047            Err(nom::Err::Error(nom::error::Error::new(
1048                " colon",
1049                ErrorKind::Tag
1050            )))
1051        );
1052        assert_eq!(
1053            md_absolute_uri("scheme:domai>n"),
1054            Err(nom::Err::Error(nom::error::Error::new(
1055                ">n",
1056                ErrorKind::Eof
1057            )))
1058        );
1059
1060        let res = md_absolute_uri("scheme:domain").unwrap();
1061        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1062        assert_eq!(res.1.0, Cow::from("scheme:domain"));
1063
1064        let res = md_absolute_uri("scheme:domai%25n").unwrap();
1065        assert!(matches!(res.1.0, Cow::Owned(..)));
1066        assert_eq!(res.1.0, Cow::from("scheme:domai%n"));
1067    }
1068
1069    #[test]
1070    fn test_md_email_address() {
1071        let res = md_email_address("local@domain").unwrap();
1072        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1073        assert!(matches!(res.1.1, Cow::Owned(..)));
1074        assert_eq!(res.1.0, Cow::from("local@domain"));
1075        assert_eq!(res.1.1, Cow::from("mailto:local@domain"));
1076
1077        let res = md_email_address("localÜ@domainÜ").unwrap();
1078        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1079        assert!(matches!(res.1.1, Cow::Owned(..)));
1080        assert_eq!(res.1.0, Cow::from("localÜ@domainÜ"));
1081        assert_eq!(res.1.1, Cow::from("mailto:localÜ@domainÜ"));
1082
1083        let res = md_email_address("lo.cal@domain").unwrap();
1084        assert!(matches!(res.1.0, Cow::Borrowed(..)));
1085        assert!(matches!(res.1.1, Cow::Owned(..)));
1086        assert_eq!(res.1.0, Cow::from("lo.cal@domain"));
1087        assert_eq!(res.1.1, Cow::from("mailto:lo.cal@domain"));
1088
1089        assert_eq!(
1090            md_email_address("lo_cal@do_main"),
1091            Err(nom::Err::Error(nom::error::Error::new(
1092                "_main",
1093                ErrorKind::Eof
1094            )))
1095        );
1096    }
1097
1098    /*
1099    #[test]
1100    fn test_md_escaped() {
1101        use nom::IResult;
1102        use nom::bytes::complete::escaped;
1103        use nom::character::complete::one_of;
1104
1105        fn esc(s: &str) -> IResult<&str, &str> {
1106          escaped(nom::character::complete::none_of(r#"\<>"#), '\\', one_of(ESCAPABLE))(s)
1107        }
1108
1109        assert_eq!(esc("123\\>123\\<4>abc"), Ok((">abc", "123\\>123\\<4")));
1110    }
1111    */
1112}