parse_hyperlinks/parser/
markdown.rs

1//! This module implements parsers for Markdown hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::parse::LABEL_LEN_MAX;
6use crate::parser::percent_decode;
7use crate::parser::Link;
8use crate::take_until_unbalanced;
9use nom::branch::alt;
10use nom::bytes::complete::tag;
11use nom::character::complete::multispace1;
12use nom::combinator::*;
13use std::borrow::Cow;
14
15/// The following character are escapable in _link text_, _link label_, _link
16/// destination_ and _link title_.
17const ESCAPABLE: &str = r###"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"###;
18
19/// Wrapper around `md_text2dest()` that packs the result in
20/// `Link::Text2Dest`.
21pub fn md_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
22    let (i, (te, de, ti)) = md_text2dest(i)?;
23    Ok((i, Link::Text2Dest(te, de, ti)))
24}
25
26/// Parses a Markdown _inline link_.
27///
28/// This parser expects to start at the beginning of the link `[` to succeed.
29/// ```
30/// use parse_hyperlinks::parser::Link;
31/// use parse_hyperlinks::parser::markdown::md_text2dest;
32/// use std::borrow::Cow;
33///
34/// assert_eq!(
35///   md_text2dest(r#"[text](<dest> "title")abc"#),
36///   Ok(("abc", (Cow::from("text"), Cow::from("dest"), Cow::from("title"))))
37/// );
38///
39/// assert_eq!(
40///   md_text2dest(r#"<scheme:dest>abc"#),
41///   Ok(("abc", (Cow::from("scheme:dest"), Cow::from("scheme:dest"), Cow::from(""))))
42/// );
43/// assert_eq!(
44///   md_text2dest(r#"<foo@dest>abc"#),
45///   Ok(("abc", (Cow::from("foo@dest"), Cow::from("mailto:foo@dest"), Cow::from(""))))
46/// );
47/// ```
48pub fn md_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
49    alt((
50        // Parse autolink.
51        nom::sequence::delimited(
52            tag("<"),
53            map_parser(
54                nom::bytes::complete::take_till1(|c: char| {
55                    c.is_ascii_whitespace() || c == '>' || c == '<'
56                }),
57                alt((md_absolute_uri, md_email_address)),
58            ),
59            tag(">"),
60        ),
61        // Parse inline link.
62        map(
63            nom::sequence::tuple((md_link_text, md_link_destination_enclosed)),
64            |(a, (b, c))| (a, b, c),
65        ),
66    ))(i)
67}
68
69/// Wrapper around `md_label2dest()` that packs the result in
70/// `Link::Label2Dest`.
71pub fn md_label2dest_link(i: &str) -> nom::IResult<&str, Link> {
72    let (i, (l, d, t)) = md_label2dest(i)?;
73    Ok((i, Link::Label2Dest(l, d, t)))
74}
75
76/// Matches a Markdown _link reference definition_.
77///
78/// The caller must guarantee, that the parser starts at first character of the
79/// input or at the first character of a line. The parser consumes all bytes
80/// until the end of the line.
81/// ```
82/// use parse_hyperlinks::parser::Link;
83/// use parse_hyperlinks::parser::markdown::md_label2dest;
84/// use std::borrow::Cow;
85///
86/// assert_eq!(
87///   md_label2dest("   [label]: <destination> 'title'\nabc"),
88///   Ok(("\nabc", (Cow::from("label"), Cow::from("destination"), Cow::from("title"))))
89/// );
90/// ```
91///
92/// [CommonMark
93/// Spec](https://spec.commonmark.org/0.30/#link-reference-definition)\ A [link
94/// reference
95/// definition](https://spec.commonmark.org/0.30/#link-reference-definition)
96/// consists of a [link label](https://spec.commonmark.org/0.30/#link-label),
97/// optionally preceded by up to three spaces of indentation, followed by a
98/// colon (`:`), optional spaces or tabs (including up to one [line
99/// ending](https://spec.commonmark.org/0.30/#line-ending)), a [link
100/// destination](https://spec.commonmark.org/0.30/#link-destination), optional
101/// spaces or tabs (including up to one [line
102/// ending](https://spec.commonmark.org/0.30/#line-ending)), and an optional
103/// [link title](https://spec.commonmark.org/0.30/#link-title), which if it is
104/// present must be separated from the [link
105/// destination](https://spec.commonmark.org/0.30/#link-destination) by spaces
106/// or tabs. No further character may occur.
107///
108/// A [link reference
109/// definition](https://spec.commonmark.org/0.30/#link-reference-definition)
110/// does not correspond to a structural element of a document. Instead, it
111/// defines a label which can be used in [reference
112/// links](https://spec.commonmark.org/0.30/#reference-link) and reference-style
113/// [images](https://spec.commonmark.org/0.30/#images) elsewhere in the
114/// document. [Link reference
115/// definitions](https://spec.commonmark.org/0.30/#link-reference-definition)
116/// can come either before or after the links that use them.
117pub fn md_label2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
118    // Consume up to three spaces.
119    let (i, _) = nom::bytes::complete::take_while_m_n(0, 3, |c| c == ' ')(i)?;
120    // Take label.
121    let (i, link_text) = md_link_label(i)?;
122    let (i, _) = nom::character::complete::char(':')(i)?;
123    // Take spaces.
124    let (i, _) = verify(nom::character::complete::multispace1, |s: &str| {
125        !s.contains("\n\n")
126    })(i)?;
127    // Take destination.
128    let (i, link_destination) = md_link_destination(i)?;
129    // Try, but do not fail.
130    let (i, link_title) = alt((
131        // Take link title.
132        md_link_title,
133        nom::combinator::success(Cow::from("")),
134    ))(i)?;
135
136    // Now consume as much whitespace as possible.
137    let (i, _) = nom::character::complete::space0(i)?;
138
139    // Check if there is newline coming. Do not consume.
140    if !i.is_empty() {
141        let _ = nom::character::complete::newline(i)?;
142    }
143
144    Ok((i, (link_text, link_destination, link_title)))
145}
146
147/// Wrapper around `md_text2label()` that packs the result in
148/// `Link::Text2Label`.
149pub fn md_text2label_link(i: &str) -> nom::IResult<&str, Link> {
150    let (i, (t, l)) = md_text2label(i)?;
151    Ok((i, Link::Text2Label(t, l)))
152}
153
154/// Parse a Markdown _reference link_.
155///
156/// There are three kinds of reference links: full, collapsed, and shortcut.
157/// 1. A full reference link consists of a link text immediately followed by a
158///    link label that matches a link reference definition elsewhere in the
159///    document.
160/// 2. A collapsed reference link consists of a link label that matches a link
161///    reference definition elsewhere in the document, followed by the string [].
162///    The contents of the first link label are parsed as inlines, which are used as
163///    the link’s text. The link’s URI and title are provided by the matching
164///    reference link definition. Thus, `[foo][]` is equivalent to `[foo][foo]`.
165/// 3. A shortcut reference link consists of a link label that matches a link
166///    reference definition elsewhere in the document and is not followed by [] or a
167///    link label. The contents of the first link label are parsed as inlines, which
168///    are used as the link’s text. The link’s URI and title are provided by the
169///    matching link reference definition. Thus, `[foo]` is equivalent to `[foo][]`.
170///
171/// This parser expects to start at the beginning of the link `[` to succeed.
172/// It should always run at last position after all other parsers.
173/// ```rust
174/// use parse_hyperlinks::parser::Link;
175/// use parse_hyperlinks::parser::markdown::md_text2label;
176/// use std::borrow::Cow;
177///
178/// assert_eq!(
179///   md_text2label("[link text][link label]abc"),
180///   Ok(("abc", (Cow::from("link text"), Cow::from("link label"))))
181/// );
182/// assert_eq!(
183///   md_text2label("[link text][]abc"),
184///   Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
185/// );
186/// assert_eq!(
187///   md_text2label("[link text]abc"),
188///   Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
189/// );
190/// ```
191pub fn md_text2label(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
192    let (i, (link_text, link_label)) = alt((
193        nom::sequence::pair(md_link_text, md_link_label),
194        nom::combinator::map(nom::sequence::terminated(md_link_text, tag("[]")), |s| {
195            (s.clone(), s)
196        }),
197        nom::combinator::map(md_link_text, |s| (s.clone(), s)),
198    ))(i)?;
199
200    // Check that there is no `[` or `(` following. Do not consume.
201    if !i.is_empty() {
202        let _ = nom::character::complete::none_of("[(")(i)?;
203    }
204
205    Ok((i, (link_text, link_label)))
206}
207
208/// Parses _link text_.
209/// Brackets are allowed in the
210/// [link text](https://spec.commonmark.org/0.29/#link-text) only if (a) they are
211/// backslash-escaped or (b) they appear as a matched pair of brackets, with
212/// an open bracket `[`, a sequence of zero or more inlines, and a close
213/// bracket `]`.
214/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-text)
215pub(crate) fn md_link_text(i: &str) -> nom::IResult<&str, Cow<str>> {
216    nom::combinator::map_parser(
217        nom::sequence::delimited(tag("["), take_until_unbalanced('[', ']'), tag("]")),
218        md_escaped_str_transform,
219    )(i)
220}
221
222/// Parses a _link label_.
223/// A link label begins with a left bracket ([) and ends with the first right
224/// bracket (]) that is not backslash-escaped. Between these brackets there must
225/// be at least one non-whitespace character. Unescaped square bracket characters
226/// are not allowed inside the opening and closing square brackets of link
227/// labels. A link label can have at most 999 characters inside the square
228/// brackets (TODO).
229/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-label)
230fn md_link_label(i: &str) -> nom::IResult<&str, Cow<str>> {
231    nom::combinator::map_parser(
232        nom::combinator::verify(
233            nom::sequence::delimited(
234                tag("["),
235                nom::bytes::complete::escaped(
236                    nom::character::complete::none_of("\\[]"),
237                    '\\',
238                    nom::character::complete::one_of(ESCAPABLE),
239                ),
240                tag("]"),
241            ),
242            |l: &str| l.len() <= LABEL_LEN_MAX,
243        ),
244        md_escaped_str_transform,
245    )(i)
246}
247
248/// This is a wrapper around `md_parse_link_destination()`. It takes its result
249/// and removes the `\` before the escaped characters `ESCAPABLE`.
250pub(crate) fn md_link_destination(i: &str) -> nom::IResult<&str, Cow<str>> {
251    nom::combinator::map_parser(md_parse_link_destination, md_escaped_str_transform)(i)
252}
253
254/// A [link destination](https://spec.commonmark.org/0.30/#link-destination)
255/// consists of either
256///
257/// * a sequence of zero or more characters between an opening `<` and a
258/// closing `>` that contains no line endings or unescaped `<` or `>`
259/// characters, or
260/// * a nonempty sequence of characters that does not start with `<`, does not
261/// include [ASCII control
262/// characters](https://spec.commonmark.org/0.30/#ascii-control-character) or
263/// [space](https://spec.commonmark.org/0.30/#space) character, and includes
264/// parentheses only if (a) they are backslash-escaped or (b) they are part of a
265/// balanced pair of unescaped parentheses. (Implementations may impose limits
266/// on parentheses nesting to avoid performance issues, but at least three
267/// levels of nesting should be supported.)
268fn md_parse_link_destination(i: &str) -> nom::IResult<&str, &str> {
269    alt((
270        nom::sequence::delimited(
271            tag("<"),
272            nom::bytes::complete::escaped(
273                nom::character::complete::none_of(r#"\<>"#),
274                '\\',
275                nom::character::complete::one_of(ESCAPABLE),
276            ),
277            tag(">"),
278        ),
279        map(nom::bytes::complete::tag("<>"), |_| ""),
280        alt((
281            nom::bytes::complete::is_not(" \t\r\n"),
282            nom::combinator::success(""),
283        )),
284    ))(i)
285}
286
287/// Matches `md_link_destination` in parenthesis.
288pub(crate) fn md_link_destination_enclosed(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
289    map_parser(
290        nom::sequence::delimited(tag("("), take_until_unbalanced('(', ')'), tag(")")),
291        nom::sequence::tuple((
292            md_link_destination,
293            alt((
294                // Take link title.
295                md_link_title,
296                nom::combinator::success(Cow::from("")),
297            )),
298        )),
299    )(i)
300}
301
302/// This is a wrapper around `md_parse_link_title()`. It takes its result
303/// and removes the `\` before the escaped characters `ESCAPABLE`.
304fn md_link_title(i: &str) -> nom::IResult<&str, Cow<str>> {
305    nom::combinator::map_parser(md_parse_link_title, md_escaped_str_transform)(i)
306}
307
308/// A link title is always preceded one or more whitespace inluding
309/// one newline.
310/// [CommonMark Spec](https://spec.commonmark.org/0.29/#link-title)
311/// A [link title](https://spec.commonmark.org/0.29/#link-title) consists of either
312///
313///  - a sequence of zero or more characters between straight double-quote
314///    characters (`"`), including a `"` character only if it is
315///    backslash-escaped, or
316///  - a sequence of zero or more characters between straight single-quote
317///    characters (`'`), including a `'` character only if it is
318///    backslash-escaped, or
319///  - a sequence of zero or more characters between matching parentheses
320///    (`(...)`), including a `(` or `)` character only if it is
321///    backslash-escaped.
322///
323///  Although [link titles](https://spec.commonmark.org/0.29/#link-title) may
324///  span multiple lines, they may not contain a [blank
325///  line](https://spec.commonmark.org/0.29/#blank-line).
326fn md_parse_link_title(i: &str) -> nom::IResult<&str, &str> {
327    nom::sequence::preceded(
328        verify(multispace1, |s: &str| !s.contains("\n\n")),
329        verify(
330            alt((
331                nom::sequence::delimited(tag("("), take_until_unbalanced('(', ')'), tag(")")),
332                nom::sequence::delimited(
333                    tag("'"),
334                    nom::bytes::complete::escaped(
335                        nom::character::complete::none_of(r#"\'"#),
336                        '\\',
337                        nom::character::complete::one_of(ESCAPABLE),
338                    ),
339                    tag("'"),
340                ),
341                nom::sequence::delimited(
342                    tag("\""),
343                    nom::bytes::complete::escaped(
344                        nom::character::complete::none_of(r#"\""#),
345                        '\\',
346                        nom::character::complete::one_of(ESCAPABLE),
347                    ),
348                    tag("\""),
349                ),
350            )),
351            |s: &str| !s.contains("\n\n"),
352        ),
353    )(i)
354}
355
356/// Remove the `\` before the escaped characters `ESCAPABLE`.
357fn md_escaped_str_transform(i: &str) -> nom::IResult<&str, Cow<str>> {
358    nom::combinator::map(
359        nom::bytes::complete::escaped_transform(
360            nom::bytes::complete::is_not("\\"),
361            '\\',
362            nom::character::complete::one_of(ESCAPABLE),
363        ),
364        |s| if s == i { Cow::from(i) } else { Cow::from(s) },
365    )(i)
366}
367
368/// Parses an [absolute URI](https://spec.commonmark.org/0.30/#absolute-uri).
369/// This parser consumes all input to succeed.
370/// An absolute URI, for these purposes, consists of a
371/// [scheme](https://spec.commonmark.org/0.30/#scheme) followed by a
372/// colon (`:`) followed by zero or more characters other [ASCII control
373/// characters](https://spec.commonmark.org/0.30/#ascii-control-character),
374/// [space](https://spec.commonmark.org/0.30/#space), `<`, and `>`. If the
375/// URI includes these characters, they must be percent-encoded (e.g. `%20`
376/// for a space).
377///
378/// For purposes of this spec, a
379/// [scheme](https://spec.commonmark.org/0.30/#scheme) is any sequence of
380/// 2–32 characters beginning with an ASCII letter and followed by any
381/// combination of ASCII letters, digits, or the symbols plus (”+”),
382/// period (”.”), or hyphen (”-”).
383///
384/// [CommonMark Spec](https://spec.commonmark.org/0.30/#autolinks)
385fn md_absolute_uri(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
386    let j = i;
387    map(
388        all_consuming(nom::sequence::separated_pair(
389            // Parse scheme.
390            verify(
391                nom::bytes::complete::take_till1(|c: char| {
392                    !(c.is_ascii_alphanumeric() || "+.-".contains(c))
393                }),
394                |s: &str| s.len() >= 2 && s.len() <= 32,
395            ),
396            tag(":"),
397            // Parse domain.
398            map_parser(
399                nom::bytes::complete::take_till1(|c: char| {
400                    c.is_ascii_control() || c.is_ascii_whitespace() || "<>".contains(c)
401                }),
402                percent_decode,
403            ),
404        )),
405        |(scheme, domain)| {
406            let uri = if matches!(domain, Cow::Borrowed(..)) {
407                Cow::Borrowed(j)
408            } else {
409                Cow::Owned(format!("{scheme}:{domain}"))
410            };
411            (uri.clone(), uri, Cow::from(""))
412        },
413    )(i)
414}
415
416/// Parses an Email address. This parser consumes all input to succeed.
417/// As it only checks and forwards, the result type is `Cow::Borrowed`.
418/// The check is not as strict but inspired by
419/// [HTML5 spec](https://html.spec.whatwg.org/multipage/forms.html#e-mail-state-(type=email)):
420/// and [CommonMark Spec](https://spec.commonmark.org/0.30/#email-autolink)
421/// The link’s label is the email address, and the
422/// URL is `mailto:` followed by the email address.
423///
424fn md_email_address(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
425    let j = i;
426    map(
427        all_consuming(nom::sequence::separated_pair(
428            // Parse scheme.
429            nom::bytes::complete::take_till1(|c: char| {
430                !(c.is_alphanumeric() || ".!#$%&'*+\\/=?^_`{|}~-".contains(c))
431            }),
432            tag("@"),
433            // Parse domain.
434            nom::bytes::complete::take_till1(|c: char| !(c.is_alphanumeric() || ".-".contains(c))),
435        )),
436        |(_, _)| {
437            (
438                Cow::Borrowed(j),
439                Cow::Owned(format!("mailto:{}", j.to_string())),
440                Cow::Borrowed(""),
441            )
442        },
443    )(i)
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449    use nom::error::ErrorKind;
450
451    #[test]
452    fn test_md_text2dest() {
453        assert_eq!(
454            md_text2dest("[text](url)abc"),
455            Ok(("abc", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
456        );
457        assert_eq!(
458            md_text2dest("[text[i]](url)abc"),
459            Ok((
460                "abc",
461                (Cow::from("text[i]"), Cow::from("url"), Cow::from(""))
462            ))
463        );
464        assert_eq!(
465            md_text2dest("[text[i]](ur(l))abc"),
466            Ok((
467                "abc",
468                (Cow::from("text[i]"), Cow::from("ur(l)"), Cow::from(""))
469            ))
470        );
471        assert_eq!(
472            md_text2dest("[text(url)"),
473            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
474        );
475        assert_eq!(
476            md_text2dest("[text](<url>)abc"),
477            Ok(("abc", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
478        );
479        assert_eq!(
480            md_text2dest("[text](<url> \"link title\")abc"),
481            Ok((
482                "abc",
483                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
484            ))
485        );
486        assert_eq!(
487            md_text2dest("[text](url \"link title\")abc"),
488            Ok((
489                "abc",
490                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
491            ))
492        );
493        // [Example 483](https://spec.commonmark.org/0.30/#example-483)
494        assert_eq!(
495            md_text2dest("[](./target.md)abc"),
496            Ok((
497                "abc",
498                (Cow::from(""), Cow::from("./target.md"), Cow::from(""))
499            ))
500        );
501        // [Example 484](https://spec.commonmark.org/0.30/#example-484)
502        assert_eq!(
503            md_text2dest("[link]()abc"),
504            Ok(("abc", (Cow::from("link"), Cow::from(""), Cow::from(""))))
505        );
506        // [Example 485](https://spec.commonmark.org/0.30/#example-485)
507        assert_eq!(
508            md_text2dest("[link](<>)abc"),
509            Ok(("abc", (Cow::from("link"), Cow::from(""), Cow::from(""))))
510        );
511        // [Example 486](https://spec.commonmark.org/0.30/#example-486)
512        assert_eq!(
513            md_text2dest("[]()abc"),
514            Ok(("abc", (Cow::from(""), Cow::from(""), Cow::from(""))))
515        );
516        assert_eq!(
517            md_text2dest("[text]abc"),
518            Err(nom::Err::Error(nom::error::Error::new(
519                "abc",
520                ErrorKind::Tag
521            )))
522        );
523        // [Example 597](https://spec.commonmark.org/0.30/#example-597)
524        assert_eq!(
525            md_text2dest("<a+b+c:d>abc"),
526            Ok((
527                "abc",
528                (Cow::from("a+b+c:d"), Cow::from("a+b+c:d"), Cow::from(""))
529            ))
530        );
531        //[Example 603](https://spec.commonmark.org/0.30/#example-603)
532        assert_eq!(
533            md_text2dest("<foo@bar.example.com>abc"),
534            Ok((
535                "abc",
536                (
537                    Cow::from("foo@bar.example.com"),
538                    Cow::from("mailto:foo@bar.example.com"),
539                    Cow::from("")
540                )
541            ))
542        );
543        assert_eq!(
544            md_text2dest("<foo.example.com>abc"),
545            Err(nom::Err::Error(nom::error::Error::new(
546                "<foo.example.com>abc",
547                ErrorKind::Tag
548            )))
549        );
550        // [Example 20](https://spec.commonmark.org/0.30/#example-20)
551        assert_eq!(
552            md_text2dest(r#"<http://example.com?find=\*>abc"#),
553            Ok((
554                "abc",
555                (
556                    Cow::from(r#"http://example.com?find=\*"#),
557                    Cow::from(r#"http://example.com?find=\*"#),
558                    Cow::from("")
559                )
560            ))
561        );
562        // [Example 22](https://spec.commonmark.org/0.30/#example-22)
563        assert_eq!(
564            md_text2dest(r#"[foo](/bar\* "ti\*tle")abc"#),
565            Ok((
566                "abc",
567                (Cow::from("foo"), Cow::from("/bar*"), Cow::from("ti*tle"))
568            ))
569        );
570    }
571
572    #[test]
573    fn test_md_text2label() {
574        assert_eq!(
575            md_text2label("[link text][link label]abc"),
576            Ok(("abc", (Cow::from("link text"), Cow::from("link label"))))
577        );
578        assert_eq!(
579            md_text2label("[link text][]abc"),
580            Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
581        );
582        assert_eq!(
583            md_text2label("[link text]abc"),
584            Ok(("abc", (Cow::from("link text"), Cow::from("link text"))))
585        );
586        assert_eq!(
587            md_text2label("[]abc"),
588            Ok(("abc", (Cow::from(""), Cow::from(""))))
589        );
590        assert_eq!(
591            md_text2label(""),
592            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
593        );
594        // Check end of input position.
595        assert_eq!(
596            md_text2label("[text]"),
597            Ok(("", (Cow::from("text"), Cow::from("text"))))
598        );
599        assert_eq!(
600            md_text2label("[text][text]"),
601            Ok(("", (Cow::from("text"), Cow::from("text"))))
602        );
603        assert_eq!(
604            md_text2label("[text][label url"),
605            Err(nom::Err::Error(nom::error::Error::new(
606                "[label url",
607                ErrorKind::NoneOf
608            )))
609        );
610        assert_eq!(
611            md_text2label("[text](url)abc"),
612            Err(nom::Err::Error(nom::error::Error::new(
613                "(url)abc",
614                ErrorKind::NoneOf
615            )))
616        );
617    }
618
619    #[test]
620    fn test_md_label2dest() {
621        assert_eq!(
622            md_label2dest("[text]: url\nabc"),
623            Ok((
624                "\nabc",
625                (Cow::from("text"), Cow::from("url"), Cow::from(""))
626            ))
627        );
628        assert_eq!(
629            md_label2dest("[text]: url  \nabc"),
630            Ok((
631                "\nabc",
632                (Cow::from("text"), Cow::from("url"), Cow::from(""))
633            ))
634        );
635        assert_eq!(
636            md_label2dest("[text]: <url url> \nabc"),
637            Ok((
638                "\nabc",
639                (Cow::from("text"), Cow::from("url url"), Cow::from(""))
640            ))
641        );
642        assert_eq!(
643            md_label2dest("[text]: url \"title\"\nabc"),
644            Ok((
645                "\nabc",
646                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
647            ))
648        );
649        assert_eq!(
650            md_label2dest("[text]: url\n\"title\"\nabc"),
651            Ok((
652                "\nabc",
653                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
654            ))
655        );
656        assert_eq!(
657            md_label2dest("   [text]: url\n\"title\"\nabc"),
658            Ok((
659                "\nabc",
660                (Cow::from("text"), Cow::from("url"), Cow::from("title"))
661            ))
662        );
663        assert_eq!(
664            md_label2dest("abc[text]: url\n\"title\""),
665            Err(nom::Err::Error(nom::error::Error::new(
666                "abc[text]: url\n\"title\"",
667                ErrorKind::Tag
668            )))
669        );
670        assert_eq!(
671            md_label2dest("    [text]: url\n\"title\" abc"),
672            Err(nom::Err::Error(nom::error::Error::new(
673                " [text]: url\n\"title\" abc",
674                ErrorKind::Tag
675            )))
676        );
677        // Nested brackets.
678        assert_eq!(
679            md_label2dest("[text\\[i\\]]: ur(l)url\nabc"),
680            Ok((
681                "\nabc",
682                (Cow::from("text[i]"), Cow::from("ur(l)url"), Cow::from(""))
683            ))
684        );
685        // Nested but balanced not allowed for link labels.
686        assert_eq!(
687            md_label2dest("[text[i]]: ur(l)(url"),
688            Err(nom::Err::Error(nom::error::Error::new(
689                "[i]]: ur(l)(url",
690                ErrorKind::Tag
691            )))
692        );
693        // Whitespace can have one newline.
694        assert_eq!(
695            md_label2dest("[text]: \nurl"),
696            Ok(("", (Cow::from("text"), Cow::from("url"), Cow::from(""))))
697        );
698        // But only one newline is allowed.
699        assert_eq!(
700            md_label2dest("[text]: \n\nurl"),
701            Err(nom::Err::Error(nom::error::Error::new(
702                " \n\nurl",
703                ErrorKind::Verify
704            )))
705        );
706        assert_eq!(
707            md_label2dest("[text: url"),
708            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
709        );
710        assert_eq!(
711            md_label2dest("[text] url"),
712            Err(nom::Err::Error(nom::error::Error::new(
713                " url",
714                ErrorKind::Char
715            )))
716        );
717        assert_eq!(
718            md_label2dest("[text]: url \"link title\"\nabc"),
719            Ok((
720                "\nabc",
721                (Cow::from("text"), Cow::from("url"), Cow::from("link title"))
722            ))
723        );
724        assert_eq!(
725            md_label2dest("[text]: url \"link\ntitle\"\nabc"),
726            Ok((
727                "\nabc",
728                (
729                    Cow::from("text"),
730                    Cow::from("url"),
731                    Cow::from("link\ntitle")
732                )
733            ))
734        );
735        assert_eq!(
736            md_label2dest("[text]: url \"link\ntitle\"abc"),
737            Err(nom::Err::Error(nom::error::Error::new(
738                "abc",
739                ErrorKind::Char
740            )))
741        );
742        assert_eq!(
743            md_label2dest("[text]:\nurl \"link\ntitle\"\nabc"),
744            Ok((
745                "\nabc",
746                (
747                    Cow::from("text"),
748                    Cow::from("url"),
749                    Cow::from("link\ntitle")
750                )
751            ))
752        );
753        assert_eq!(
754            md_label2dest("[text]: url \"link\n\ntitle\"\nabc"),
755            Err(nom::Err::Error(nom::error::Error::new(
756                "\"link\n\ntitle\"\nabc",
757                ErrorKind::Char
758            )))
759        );
760        assert_eq!(
761            md_label2dest("[text]:\n\nurl \"link title\"\nabc"),
762            Err(nom::Err::Error(nom::error::Error::new(
763                "\n\nurl \"link title\"\nabc",
764                ErrorKind::Verify
765            )))
766        );
767        // [Example 23](https://spec.commonmark.org/0.30/#example-23)
768        assert_eq!(
769            md_label2dest(r#"[foo]: /bar\* "ti\*tle""#),
770            Ok((
771                "",
772                (Cow::from("foo"), Cow::from("/bar*"), Cow::from("ti*tle"))
773            ))
774        );
775    }
776
777    #[test]
778    fn test_md_link_text() {
779        assert_eq!(
780            md_link_text("[text](url)"),
781            Ok(("(url)", Cow::from("text")))
782        );
783        assert_eq!(
784            md_link_text("[text[i]](url)"),
785            Ok(("(url)", Cow::from("text[i]")))
786        );
787        assert_eq!(
788            md_link_text(r#"[text\[i\]](url)"#),
789            Ok(("(url)", Cow::from("text[i]")))
790        );
791        assert_eq!(
792            md_link_text("[text(url)"),
793            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
794        );
795        assert_eq!(
796            md_link_text(r#"[te\_xt](url)"#),
797            Ok(("(url)", Cow::from("te_xt")))
798        );
799    }
800
801    #[test]
802    fn test_md_link_label() {
803        assert_eq!(
804            md_link_label("[text]: url"),
805            Ok((": url", Cow::from("text")))
806        );
807        assert_eq!(
808            md_link_label(r#"[text\[i\]]: url"#),
809            Ok((": url", Cow::from("text[i]")))
810        );
811        assert_eq!(
812            md_link_label("[text: url"),
813            Err(nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)))
814        );
815        assert_eq!(
816            md_link_label("[t[ext: url"),
817            Err(nom::Err::Error(nom::error::Error::new(
818                "[ext: url",
819                ErrorKind::Tag
820            )))
821        );
822    }
823
824    #[test]
825    fn test_md_link_destination() {
826        assert_eq!(
827            md_link_destination("url  abc"),
828            Ok(("  abc", Cow::from("url")))
829        );
830        assert_eq!(md_link_destination("url"), Ok(("", Cow::from("url"))));
831        assert_eq!(
832            md_link_destination("url\nabc"),
833            Ok(("\nabc", Cow::from("url")))
834        );
835        assert_eq!(
836            md_link_destination("<url>abc"),
837            Ok(("abc", Cow::from("url")))
838        );
839        assert_eq!(
840            md_link_destination(r#"<u\<r\>l>abc"#),
841            Ok(("abc", Cow::from(r#"u<r>l"#)))
842        );
843        assert_eq!(
844            md_link_destination(r#"u\)r\(l abc"#),
845            Ok((" abc", Cow::from(r#"u)r(l"#)))
846        );
847        assert_eq!(
848            md_link_destination(r#"u(r)l abc"#),
849            Ok((" abc", Cow::from(r#"u(r)l"#)))
850        );
851        assert_eq!(
852            md_link_destination("u(r)l\nabc"),
853            Ok(("\nabc", Cow::from(r#"u(r)l"#)))
854        );
855    }
856
857    #[test]
858    fn test_md_parse_link_destination() {
859        assert_eq!(md_parse_link_destination("<url>abc"), Ok(("abc", "url")));
860        assert_eq!(
861            md_parse_link_destination(r#"<u\<r\>l>abc"#),
862            Ok(("abc", r#"u\<r\>l"#))
863        );
864        assert_eq!(md_parse_link_destination("<url> abc"), Ok((" abc", "url")));
865        assert_eq!(
866            md_parse_link_destination("<url>\nabc"),
867            Ok(("\nabc", "url"))
868        );
869        assert_eq!(
870            md_parse_link_destination("<url 2>abc"),
871            Ok(("abc", "url 2"))
872        );
873        assert_eq!(md_parse_link_destination("url abc"), Ok((" abc", "url")));
874        assert_eq!(
875            md_parse_link_destination("<url(1)> abc"),
876            Ok((" abc", "url(1)"))
877        );
878        assert_eq!(
879            md_parse_link_destination(r#"<[1a]\[1b\](2a)\(2b\)\<3b\>{4a}\{4b\}> abc"#),
880            Ok((" abc", r#"[1a]\[1b\](2a)\(2b\)\<3b\>{4a}\{4b\}"#))
881        );
882        assert_eq!(
883            md_parse_link_destination("ur()l abc"),
884            Ok((" abc", "ur()l"))
885        );
886        assert_eq!(
887            md_parse_link_destination("ur()l\nabc"),
888            Ok(("\nabc", "ur()l"))
889        );
890        assert_eq!(md_parse_link_destination("<>abc"), Ok(("abc", "")));
891        assert_eq!(md_parse_link_destination("<>\nabc"), Ok(("\nabc", "")));
892        assert_eq!(md_parse_link_destination("url"), Ok(("", "url")));
893        assert_eq!(md_parse_link_destination(""), Ok(("", "")));
894        assert_eq!(md_parse_link_destination("\nabc"), Ok(("\nabc", "")));
895    }
896
897    #[test]
898    fn test_md_escaped_str_transform() {
899        assert_eq!(md_escaped_str_transform(""), Ok(("", Cow::from(""))));
900        // Different than the link destination version.
901        assert_eq!(md_escaped_str_transform("   "), Ok(("", Cow::from("   "))));
902        assert_eq!(
903            md_escaped_str_transform(r#"abc`:<>abc"#),
904            Ok(("", Cow::from(r#"abc`:<>abc"#)))
905        );
906        assert_eq!(
907            md_escaped_str_transform(r#"\<\>\\"#),
908            Ok(("", Cow::from(r#"<>\"#)))
909        );
910        assert_eq!(
911            md_escaped_str_transform(r#"\(\)\\"#),
912            Ok(("", Cow::from(r#"()\"#)))
913        );
914        // [Example 12](https://spec.commonmark.org/0.30/#example-12)
915        assert_eq!(
916            md_escaped_str_transform(
917                r#"\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~"#
918            ),
919            Ok(("", Cow::from(r###"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"###)))
920        );
921    }
922
923    #[test]
924    fn test_md_link_title() {
925        // Similar to the
926        // [Example 504](https://spec.commonmark.org/0.30/#example-504)
927        assert_eq!(
928            md_link_title(" (title)abc"),
929            Ok(("abc", Cow::from("title")))
930        );
931        assert_eq!(
932            md_link_title(" (ti(t)le)abc"),
933            Ok(("abc", Cow::from("ti(t)le")))
934        );
935        assert_eq!(
936            md_link_title(r#" (ti\(t\)le)abc"#),
937            Ok(("abc", Cow::from("ti(t)le")))
938        );
939        assert_eq!(
940            md_link_title(r#" "1\\23\"4\'56"abc"#),
941            Ok(("abc", Cow::from(r#"1\23"4'56"#)))
942        );
943        assert_eq!(
944            md_link_title(" \"tu\nvwxy\"abc"),
945            Ok(("abc", Cow::from("tu\nvwxy")))
946        );
947        assert_eq!(
948            md_link_title(" 'tu\nv\\\'wxy'abc"),
949            Ok(("abc", Cow::from("tu\nv\'wxy")))
950        );
951        assert_eq!(
952            md_link_title(" (ti\n\ntle)abc"),
953            Err(nom::Err::Error(nom::error::Error::new(
954                "(ti\n\ntle)abc",
955                ErrorKind::Verify
956            )))
957        );
958    }
959
960    #[test]
961    fn test_md_parse_link_title() {
962        assert_eq!(md_parse_link_title(" (title)abc"), Ok(("abc", "title")));
963        assert_eq!(md_parse_link_title(" (ti(t)le)abc"), Ok(("abc", "ti(t)le")));
964        assert_eq!(
965            md_parse_link_title(r#" "1\\23\"4\'56"abc"#),
966            Ok(("abc", r#"1\\23\"4\'56"#))
967        );
968        assert_eq!(
969            md_parse_link_title(" \"tu\nvwxy\"abc"),
970            Ok(("abc", "tu\nvwxy"))
971        );
972        assert_eq!(
973            md_parse_link_title(" 'tu\nv\\\'wxy'abc"),
974            Ok(("abc", "tu\nv\\\'wxy"))
975        );
976        assert_eq!(
977            md_parse_link_title(" (ti\n\ntle)abc"),
978            Err(nom::Err::Error(nom::error::Error::new(
979                "(ti\n\ntle)abc",
980                ErrorKind::Verify
981            )))
982        );
983    }
984    #[test]
985    fn test_md_absolute_uri() {
986        assert_eq!(
987            md_absolute_uri("http://domain.com").unwrap().1 .0,
988            Cow::Borrowed("http://domain.com")
989        );
990        assert_eq!(
991            md_absolute_uri("http://domain.com").unwrap().1 .1,
992            Cow::Borrowed("http://domain.com")
993        );
994        assert_eq!(
995            md_absolute_uri("scheme:domain").unwrap().1 .1,
996            Cow::Borrowed("scheme:domain")
997        );
998        assert_eq!(
999            md_absolute_uri("scheme:domain abc"),
1000            Err(nom::Err::Error(nom::error::Error::new(
1001                " abc",
1002                ErrorKind::Eof
1003            )))
1004        );
1005        assert_eq!(
1006            md_absolute_uri("h:domain"),
1007            Err(nom::Err::Error(nom::error::Error::new(
1008                "h:domain",
1009                ErrorKind::Verify
1010            )))
1011        );
1012        assert_eq!(
1013            md_absolute_uri("sche&me:domain"),
1014            Err(nom::Err::Error(nom::error::Error::new(
1015                "&me:domain",
1016                ErrorKind::Tag
1017            )))
1018        );
1019        assert_eq!(
1020            md_absolute_uri("scheme+much+too.long......................:uri"),
1021            Err(nom::Err::Error(nom::error::Error::new(
1022                "scheme+much+too.long......................:uri",
1023                ErrorKind::Verify
1024            )))
1025        );
1026        assert_eq!(
1027            md_absolute_uri("httpÜ:domain abc"),
1028            Err(nom::Err::Error(nom::error::Error::new(
1029                "Ü:domain abc",
1030                ErrorKind::Tag
1031            )))
1032        );
1033        assert_eq!(
1034            md_absolute_uri("no colon"),
1035            Err(nom::Err::Error(nom::error::Error::new(
1036                " colon",
1037                ErrorKind::Tag
1038            )))
1039        );
1040        assert_eq!(
1041            md_absolute_uri("scheme:domai>n"),
1042            Err(nom::Err::Error(nom::error::Error::new(
1043                ">n",
1044                ErrorKind::Eof
1045            )))
1046        );
1047
1048        let res = md_absolute_uri("scheme:domain").unwrap();
1049        assert!(matches!(res.1 .0, Cow::Borrowed(..)));
1050        assert_eq!(res.1 .0, Cow::from("scheme:domain"));
1051
1052        let res = md_absolute_uri("scheme:domai%25n").unwrap();
1053        assert!(matches!(res.1 .0, Cow::Owned(..)));
1054        assert_eq!(res.1 .0, Cow::from("scheme:domai%n"));
1055    }
1056
1057    #[test]
1058    fn test_md_email_address() {
1059        let res = md_email_address("local@domain").unwrap();
1060        assert!(matches!(res.1 .0, Cow::Borrowed(..)));
1061        assert!(matches!(res.1 .1, Cow::Owned(..)));
1062        assert_eq!(res.1 .0, Cow::from("local@domain"));
1063        assert_eq!(res.1 .1, Cow::from("mailto:local@domain"));
1064
1065        let res = md_email_address("localÜ@domainÜ").unwrap();
1066        assert!(matches!(res.1 .0, Cow::Borrowed(..)));
1067        assert!(matches!(res.1 .1, Cow::Owned(..)));
1068        assert_eq!(res.1 .0, Cow::from("localÜ@domainÜ"));
1069        assert_eq!(res.1 .1, Cow::from("mailto:localÜ@domainÜ"));
1070
1071        let res = md_email_address("lo.cal@domain").unwrap();
1072        assert!(matches!(res.1 .0, Cow::Borrowed(..)));
1073        assert!(matches!(res.1 .1, Cow::Owned(..)));
1074        assert_eq!(res.1 .0, Cow::from("lo.cal@domain"));
1075        assert_eq!(res.1 .1, Cow::from("mailto:lo.cal@domain"));
1076
1077        assert_eq!(
1078            md_email_address("lo_cal@do_main"),
1079            Err(nom::Err::Error(nom::error::Error::new(
1080                "_main",
1081                ErrorKind::Eof
1082            )))
1083        );
1084    }
1085
1086    /*
1087    #[test]
1088    fn test_md_escaped() {
1089        use nom::IResult;
1090        use nom::bytes::complete::escaped;
1091        use nom::character::complete::one_of;
1092
1093        fn esc(s: &str) -> IResult<&str, &str> {
1094          escaped(nom::character::complete::none_of(r#"\<>"#), '\\', one_of(ESCAPABLE))(s)
1095        }
1096
1097        assert_eq!(esc("123\\>123\\<4>abc"), Ok((">abc", "123\\>123\\<4")));
1098    }
1099    */
1100}