parse_hyperlinks/parser/
restructured_text.rs

1//! This module implements parsers for RestructuredText hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::Link;
6use crate::parser::parse::LABEL_LEN_MAX;
7use nom::branch::alt;
8use nom::bytes::complete::tag;
9use nom::combinator::*;
10use nom::{IResult, Parser};
11use std::borrow::Cow;
12
13/// Character that can be escaped with `\`.
14///
15/// Note: If ever you change this, change also
16/// `rst_escaped_link_text_transform()`.
17const ESCAPABLE: &str = r#" `:<>_\"#;
18
19/// Wrapper around `rst_text2dest()` that packs the result in
20/// `Link::Text2Dest`.
21pub fn rst_text2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
22    let (i, (te, de, ti)) = rst_text2dest(i)?;
23    Ok((i, Link::Text2Dest(te, de, ti)))
24}
25
26/// Parse a RestructuredText _inline hyperlink_.
27///
28/// The parser expects to start at the link start (\`) to succeed.
29/// As rst does not know about link titles,
30/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`
31/// ```
32/// use parse_hyperlinks::parser::Link;
33/// use parse_hyperlinks::parser::restructured_text::rst_text2dest;
34/// use std::borrow::Cow;
35///
36/// assert_eq!(
37///   rst_text2dest("`name <destination>`__abc"),
38///   Ok(("abc", (Cow::from("name"), Cow::from("destination"), Cow::from(""))))
39/// );
40/// ```
41/// A hyperlink reference may directly embed a destination URI or (since Docutils
42/// 0.11) a hyperlink reference within angle brackets `<>` as shown in the
43/// following example:
44/// ```rst
45/// abc `Python home page <http://www.python.org>`__ abc
46/// ```
47/// The bracketed URI must be preceded by whitespace and be the last text
48/// before the end string.
49pub fn rst_text2dest(
50    i: &'_ str,
51) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
52    let (i, (ln, ld)) = rst_parse_text2target(true, false)(i)?;
53    let ln = rst_escaped_link_text_transform(ln)?.1;
54    let ld = rst_escaped_link_destination_transform(ld)?.1;
55
56    Ok((i, (ln, ld, Cow::Borrowed(""))))
57}
58
59/// Wrapper around `rst_textlabel2dest()` that packs the result in
60/// `Link::TextLabel2Dest`.
61pub fn rst_text_label2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
62    let (i, (te, de, ti)) = rst_text_label2dest(i)?;
63    Ok((i, Link::TextLabel2Dest(te, de, ti)))
64}
65
66/// Parse a RestructuredText combined _inline hyperlink_ with _link reference definition_.
67///
68/// The parser expects to start at the link start (\`) to succeed.
69/// As rst does not know about link titles,
70/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`.
71/// ```
72/// use parse_hyperlinks::parser::Link;
73/// use parse_hyperlinks::parser::restructured_text::rst_text_label2dest;
74/// use std::borrow::Cow;
75///
76/// assert_eq!(
77///   rst_text_label2dest("`name <destination>`_abc"),
78///   Ok(("abc", (Cow::from("name"), Cow::from("destination"), Cow::from(""))))
79/// );
80/// ```
81/// A hyperlink reference may directly embed a destination URI or (since Docutils
82/// 0.11) a hyperlink reference within angle brackets `<>` as shown in the
83/// following example:
84/// ```rst
85/// abc `Python home page <http://www.python.org>`_ abc
86/// ```
87/// The bracketed URI must be preceded by whitespace and be the last text
88/// before the end string.
89pub fn rst_text_label2dest(
90    i: &'_ str,
91) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
92    let (i, (ln, ld)) = rst_parse_text2target(false, false)(i)?;
93    let ln = rst_escaped_link_text_transform(ln)?.1;
94    let ld = rst_escaped_link_destination_transform(ld)?.1;
95
96    Ok((i, (ln, ld, Cow::Borrowed(""))))
97}
98
99/// This parser finds rst links of type:
100///     `*<*>`__
101/// or:
102///     `*<*>`_
103///
104/// Escape sequences are recognized and skipped, but not replaced here.
105/// If `anonym==true`: it recognizes:
106///     `*<*>`__
107/// otherwise:
108///     `*<*>`_
109///
110/// If `label==true` (`target==label`): it recognizes
111///     `*<*_>`_?
112/// otherwise (`target==dest`):
113///     `*<*>`_?
114fn rst_parse_text2target(
115    anonym: bool,
116    label: bool,
117) -> impl Fn(&str) -> IResult<&str, (&str, &str)> {
118    move |i: &str| {
119        let (mut i, inner) = nom::sequence::delimited(
120            tag("`"),
121            nom::bytes::complete::escaped(
122                nom::character::complete::none_of(r#"\`"#),
123                '\\',
124                nom::character::complete::one_of(ESCAPABLE),
125            ),
126            tag("`_"),
127        )
128        .parse(i)?;
129
130        if anonym {
131            let (j, _) = nom::character::complete::char('_')(i)?;
132            i = j;
133        };
134
135        // Assure that the next char is not`_`.
136        if !i.is_empty() {
137            let _ = nom::combinator::not(nom::character::complete::char('_')).parse(i)?;
138        };
139
140        // From here on, we only deal with the inner result of the above.
141        // Take everything until the first unescaped `<`
142        let (inner_rest, link_text): (&str, &str) = nom::bytes::complete::escaped(
143            nom::character::complete::none_of(r#"\<"#),
144            '\\',
145            nom::character::complete::one_of(ESCAPABLE),
146        )(inner)?;
147        // Trim trailing whitespace.
148        let link_text = link_text.trim_end();
149
150        let (j, mut link_dest_label) = nom::sequence::delimited(
151            tag("<"),
152            nom::bytes::complete::escaped(
153                nom::character::complete::none_of(r#"\<>"#),
154                '\\',
155                nom::character::complete::one_of(ESCAPABLE),
156            ),
157            tag(">"),
158        )
159        .parse(inner_rest)?;
160
161        // Fail if there are bytes left between `>` and `\``.
162        let (_, _) = nom::combinator::eof(j)?;
163
164        // Now check if `link_dest_label` is what we are expecting (which depends
165        // on `label`).
166
167        // Fail if `link_dest_label` is empty.
168        let (_, _) = nom::combinator::not(nom::combinator::eof).parse(link_dest_label)?;
169
170        // Get last char.
171        let last_char_is_ = link_dest_label.is_char_boundary(link_dest_label.len() - 1)
172            && &link_dest_label[link_dest_label.len() - 1..] == "_";
173        // If (`label==true`), we expect trailing `_`, fail otherwise.
174        // If (`label==false`), we fail when there is a trailing `_`.
175        if (label && !last_char_is_) || (!label && last_char_is_) {
176            return Err(nom::Err::Error(nom::error::Error::new(
177                i,
178                nom::error::ErrorKind::Tag,
179            )));
180        };
181        // When label, strip trailing `_`.
182        if label {
183            link_dest_label = &link_dest_label[..link_dest_label.len() - 1];
184        };
185
186        Ok((i, (link_text, link_dest_label)))
187    }
188}
189
190/// Wrapper around `rst_text2dest()` that packs the result in
191/// `Link::Text2Dest`.
192pub fn rst_text2label_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
193    let (i, (te, la)) = rst_text2label(i)?;
194    Ok((i, Link::Text2Label(te, la)))
195}
196
197/// Parse a RestructuredText _reference link_.
198///
199/// The caller must guarantee, that
200/// * the parser is at the input start (no bytes exist before).
201/// * the preceding bytes are whitespaces or newline, _or_
202/// * the preceding bytes are whitespaces or newline, followed by one of: `([<'"`
203/// ```rust
204/// use parse_hyperlinks::parser::Link;
205/// use parse_hyperlinks::parser::restructured_text::rst_text2label;
206/// use std::borrow::Cow;
207///
208/// assert_eq!(
209///   rst_text2label("linktext_ abc"),
210///   Ok((" abc", (Cow::from("linktext"), Cow::from("linktext"))))
211/// );
212/// assert_eq!(
213///   rst_text2label("`link text`_ abc"),
214///   Ok((" abc", (Cow::from("link text"), Cow::from("link text"))))
215/// );
216/// assert_eq!(
217///   rst_text2label("`link text<link label_>`_ abc"),
218///   Ok((" abc", (Cow::from("link text"), Cow::from("link label"))))
219/// );
220/// assert_eq!(
221///   rst_text2label("`link text`__ abc"),
222///   Ok((" abc", (Cow::from("link text"), Cow::from("_"))))
223/// );
224/// ```
225///
226pub fn rst_text2label(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
227    let (i, (te, la)) = rst_parse_text2label(i)?;
228    let te = rst_escaped_link_text_transform(te)?.1;
229    let la = rst_escaped_link_text_transform(la)?.1;
230
231    Ok((i, (te, la)))
232}
233
234/// Parses a _reference link_. (Doctree element `reference`).
235///
236/// Named hyperlink references:
237/// No start-string, end-string = `_.
238/// Start-string = "`", end-string = `\`_`. (Phrase references.)
239/// Anonymous hyperlink references:
240/// No start-string, end-string = `__`.
241/// Start-string = "`", end-string = `\`__`. (Phrase references.)
242///
243///
244/// Hyperlink references are indicated by a trailing underscore, "_", except for
245/// standalone hyperlinks which are recognized independently.
246///
247/// Important: before this parser try `rst_text2dest()` first!
248///
249/// The caller must guarantee, that either:
250/// * we are at the input start -or-
251/// * the byte just before was a whitespace (including newline)!
252///
253/// For named references in reStructuredText `link_text` and `link_label`
254/// are the same. By convention we return for anonymous references:
255/// `link_label='_'`.
256///
257/// The parser checks that this _reference link_ is followed by a whitespace
258/// without consuming it.
259///
260fn rst_parse_text2label(i: &str) -> nom::IResult<&str, (&str, &str)> {
261    let (mut i, (link_text, mut link_label)) = alt((
262        rst_parse_text2target(false, true),
263        nom::combinator::map(rst_parse_simple_label, |s| (s, s)),
264    ))
265    .parse(i)?;
266
267    // Is this an anonymous reference? Consume the second `_` also.
268    if let (j, Some(_)) = nom::combinator::opt(nom::character::complete::char('_')).parse(i)? {
269        link_label = "_";
270        i = j;
271    };
272
273    Ok((i, (link_text, link_label)))
274}
275
276/// Wrapper around `rst_label2dest()` that packs the result in
277/// `Link::Label2Dest`.
278pub fn rst_label2dest_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
279    let (i, (l, d, t)) = rst_label2dest(i)?;
280    Ok((i, Link::Label2Dest(l, d, t)))
281}
282
283/// Parse a reStructuredText _link reference definition_.
284///
285/// This parser consumes until the end of the line. As rst does not know about link titles,
286/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`.
287/// ```
288/// use parse_hyperlinks::parser::Link;
289/// use parse_hyperlinks::parser::restructured_text::rst_label2dest;
290/// use std::borrow::Cow;
291///
292/// assert_eq!(
293///   rst_label2dest("   .. _`label`: destination\nabc"),
294///   Ok(("\nabc", (Cow::from("label"), Cow::from("destination"), Cow::from(""))))
295/// );
296/// assert_eq!(
297///   rst_label2dest("   .. __: destination\nabc"),
298///   Ok(("\nabc", (Cow::from("_"), Cow::from("destination"), Cow::from(""))))
299/// );
300/// assert_eq!(
301///   rst_label2dest("   __ destination\nabc"),
302///   Ok(("\nabc", (Cow::from("_"), Cow::from("destination"), Cow::from(""))))
303/// );
304/// ```
305/// Here some examples for link references:
306/// ```rst
307/// .. _Python home page: http://www.python.org
308/// .. _`Python: home page`: http://www.python.org
309/// ```
310/// See unit test `test_rst_label2dest()` for more examples.
311pub fn rst_label2dest(
312    i: &'_ str,
313) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>, Cow<'_, str>)> {
314    let (i, (l, d)) = rst_label2target(false, i)?;
315    Ok((i, (l, d, Cow::from(""))))
316}
317
318/// Wrapper around `rst_label2label()` that packs the result in
319/// `Link::Label2Label`.
320pub fn rst_label2label_link(i: &'_ str) -> nom::IResult<&'_ str, Link<'_>> {
321    let (i, (l1, l2)) = rst_label2label(i)?;
322    Ok((i, Link::Label2Label(l1, l2)))
323}
324
325/// Parse a reStructuredText _link reference to link reference definition_.
326/// This type defines an alias (alternative name) for a link reference:
327/// ```
328/// use parse_hyperlinks::parser::Link;
329/// use parse_hyperlinks::parser::restructured_text::rst_label2label;
330/// use std::borrow::Cow;
331///
332/// assert_eq!(
333///   rst_label2label("   .. _`alt label`: `label`_\nabc"),
334///   Ok(("\nabc", (Cow::from("alt label"), Cow::from("label"))))
335/// );
336/// assert_eq!(
337///   rst_label2label("   .. __: label_\nabc"),
338///   Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
339/// );
340/// assert_eq!(
341///   rst_label2label("   __ label_\nabc"),
342///   Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
343/// );
344/// ```
345pub fn rst_label2label(i: &'_ str) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
346    rst_label2target(true, i)
347}
348
349/// Parser for _link_reference_definitions_:
350/// * `label==false`:  the link is of type `Label2Dest`
351/// * `label==true`: the link is of type `Label2Label`
352fn rst_label2target(
353    label: bool,
354    i: &'_ str,
355) -> nom::IResult<&'_ str, (Cow<'_, str>, Cow<'_, str>)> {
356    let my_err = |_| {
357        nom::Err::Error(nom::error::Error::new(
358            i,
359            nom::error::ErrorKind::EscapedTransform,
360        ))
361    };
362
363    // If there is a block start? What kind of?
364    let (i, c, block_header_is__) =
365        if let (i, Some(c)) = nom::combinator::opt(rst_explicit_markup_block(".. ")).parse(i)? {
366            (i, c, false)
367        } else {
368            let (i, c) = rst_explicit_markup_block("__ ")(i)?;
369            (i, c, true)
370        };
371
372    let (source, target) = match c {
373        Cow::Borrowed(s) => {
374            let (_, (ls, lt)) = if !block_header_is__ {
375                rst_parse_label2target(label)(s)?
376            } else if label {
377                // This is supposed to be a label.
378                ("", ("_", rst_parse_simple_label(s)?.1))
379            } else {
380                // This is supposed to be a destination (url).
381                ("", ("_", s))
382            };
383            // If the target is a destination (not a label), the last char must not be `_`.
384            if !label {
385                let _ = nom::combinator::not(rst_parse_simple_label)
386                    .parse(lt)
387                    .map_err(my_err)?;
388            };
389            (
390                rst_escaped_link_text_transform(ls)?.1,
391                rst_escaped_link_destination_transform(lt)?.1,
392            )
393        }
394
395        Cow::Owned(strg) => {
396            let (_, (ls, lt)) = if !block_header_is__ {
397                rst_parse_label2target(label)(&strg).map_err(my_err)?
398            } else if label {
399                // This is supposed to be a label.
400                let s = rst_parse_simple_label(&strg).map_err(my_err)?.1;
401                ("", ("_", s))
402            } else {
403                // This is supposed to be a destination (url).
404                ("", ("_", strg.as_str()))
405            };
406            // If the target is a destination (not a label), the last char must not be `_`.
407            if !label {
408                let _ = nom::combinator::not(rst_parse_simple_label)
409                    .parse(lt)
410                    .map_err(my_err)?;
411            };
412            let ls = Cow::Owned(
413                rst_escaped_link_text_transform(ls)
414                    .map_err(my_err)?
415                    .1
416                    .to_string(),
417            );
418            let lt = Cow::Owned(
419                rst_escaped_link_destination_transform(lt)
420                    .map_err(my_err)?
421                    .1
422                    .to_string(),
423            );
424            (ls, lt)
425        }
426    };
427
428    // We do not need to consume whitespace until the end of the line,
429    // because `rst_explicit_markup_block()` had stripped the whitespace
430    // already.
431
432    Ok((i, (source, target)))
433}
434
435/// The parser recognizes `Label2Dest` links (`label==false`):
436///     _label: dest
437/// or `Label2Label` links (`label==true):
438///     _alt_label: label_
439/// It does not perform any escape character transformation.
440fn rst_parse_label2target(label: bool) -> impl Fn(&str) -> IResult<&str, (&str, &str)> {
441    move |i: &str| {
442        let (i, link_text) = alt((
443            nom::sequence::delimited(
444                tag("_`"),
445                nom::bytes::complete::escaped(
446                    nom::character::complete::none_of(r#"\`"#),
447                    '\\',
448                    nom::character::complete::one_of(ESCAPABLE),
449                ),
450                tag("`: "),
451            ),
452            nom::sequence::delimited(
453                tag("_"),
454                nom::bytes::complete::escaped(
455                    nom::character::complete::none_of(r#"\:"#),
456                    '\\',
457                    nom::character::complete::one_of(ESCAPABLE),
458                ),
459                tag(": "),
460            ),
461            nom::combinator::value("_", tag("__: ")),
462        ))
463        .parse(i)?;
464
465        let link_target = if label {
466            // The target is another label.
467            rst_parse_simple_label(i)?.1
468        } else {
469            // The target is a destination.
470            i
471        };
472
473        Ok(("", (link_text, link_target)))
474    }
475}
476
477/// This parser consumes a simple label:
478///     one_word_label_
479/// or
480///     `more words label`_
481fn rst_parse_simple_label(i: &str) -> nom::IResult<&str, &str> {
482    // Consumes and returns a word ending with `_`.
483    // Strips off one the trailing `_` before returning the result.
484    fn take_word_consume_first_ending_underscore(i: &str) -> nom::IResult<&str, &str> {
485        let mut i = i;
486        let (k, mut r) = nom::bytes::complete::take_till1(|c: char| {
487            !(c.is_alphanumeric() || c == '-' || c == '_')
488        })(i)?;
489        // Is `r` ending with `__`? There should be at least 2 bytes: `"__".len()`
490        if r.len() >= 3 && r.is_char_boundary(r.len() - 2) && &r[r.len() - 2..] == "__" {
491            // Consume one `_`, but keep one `_` in remaining bytes.
492            i = &i[r.len() - 1..];
493            // Strip two `__` from result.
494            r = &r[..r.len() - 2];
495        // Is `r` ending with `_`? There should be at least 1 byte: `"_".len()`.
496        } else if !r.is_empty() && r.is_char_boundary(r.len() - 1) && &r[r.len() - 1..] == "_" {
497            // Remaining bytes.
498            i = k;
499            // Strip `_` from result.
500            r = &r[..r.len() - 1]
501        } else {
502            return Err(nom::Err::Error(nom::error::Error::new(
503                k,
504                nom::error::ErrorKind::Tag,
505            )));
506        };
507
508        Ok((i, r))
509    }
510
511    let (i, r) = nom::combinator::verify(
512        alt((
513            nom::sequence::delimited(
514                tag("`"),
515                nom::bytes::complete::escaped(
516                    nom::character::complete::none_of(r#"\`"#),
517                    '\\',
518                    nom::character::complete::one_of(ESCAPABLE),
519                ),
520                tag("`_"),
521            ),
522            take_word_consume_first_ending_underscore,
523        )),
524        |s: &str| s.len() <= LABEL_LEN_MAX,
525    )
526    .parse(i)?;
527
528    // Return error if label is empty.
529    let _ = nom::combinator::not(alt((nom::combinator::eof, tag("``")))).parse(r)?;
530
531    Ok((i, r))
532}
533
534/// This parses an explicit markup block.
535/// The parser expects to start at the beginning of the line.
536/// Syntax diagram:
537/// ```text
538/// +-------+----------------------+
539/// | ".. " | in  1                |
540/// +-------+ in  2                |
541///         |    in  3             |
542///         +----------------------+
543/// out
544/// ```
545/// An explicit markup block is a text block:
546/// * whose first line begins with ".." followed by whitespace (the "explicit
547///   markup start"),
548/// * whose second and subsequent lines (if any) are indented relative to the
549///   first, and
550/// * which ends before an unintended line.
551///
552/// As with external hyperlink targets, the link block of an indirect
553/// hyperlink target may begin on the same line as the explicit markup start
554/// or the next line. It may also be split over multiple lines, in which case
555/// the lines are joined with whitespace before being normalized.
556fn rst_explicit_markup_block<'a>(
557    block_header: &'a str,
558) -> impl Fn(&'a str) -> IResult<&'a str, Cow<'a, str>> {
559    move |i: &'a str| {
560        fn indent<'a>(wsp1: &'a str, wsp2: &'a str) -> impl Fn(&'a str) -> IResult<&'a str, ()> {
561            move |i: &str| {
562                let (i, _) = nom::character::complete::line_ending(i)?;
563                let (i, _) = nom::bytes::complete::tag(wsp1)(i)?;
564                let (i, _) = nom::bytes::complete::tag(wsp2)(i)?;
565                Ok((i, ()))
566            }
567        }
568
569        let (i, (wsp1, wsp2)) = nom::sequence::pair(
570            nom::character::complete::space0,
571            nom::combinator::map(nom::bytes::complete::tag(block_header), |_| "   "),
572        )
573        .parse(i)?;
574
575        let (j, v) = nom::multi::separated_list1(
576            indent(wsp1, wsp2),
577            nom::character::complete::not_line_ending,
578        )
579        .parse(i)?;
580
581        // If the block consists of only one line return now.
582        if v.len() == 1 {
583            return Ok((j, Cow::Borrowed(v[0])));
584        };
585
586        let mut s = String::new();
587        let mut is_first = true;
588
589        for subs in &v {
590            if !is_first {
591                s.push(' ');
592            }
593            s.push_str(subs);
594            is_first = false;
595        }
596
597        Ok((j, Cow::from(s)))
598    }
599}
600
601/// Replace the following escaped characters:
602///     \\\`\ \:\<\>
603/// with:
604///     \`:<>
605/// Preserves usual whitespace, but removes `\ `.
606fn rst_escaped_link_text_transform(i: &'_ str) -> IResult<&'_ str, Cow<'_, str>> {
607    nom::combinator::map(
608        nom::bytes::complete::escaped_transform(
609            nom::bytes::complete::is_not("\\"),
610            '\\',
611            // This list is the same as `ESCAPABLE`.
612            alt((
613                tag("\\"),
614                tag("`"),
615                tag(":"),
616                tag("<"),
617                tag(">"),
618                tag("_"),
619                value("", tag(" ")),
620            )),
621        ),
622        |s| if s == i { Cow::from(i) } else { Cow::from(s) },
623    )
624    .parse(i)
625}
626
627/// Deletes all whitespace, but keeps one space for each `\ `.
628fn remove_whitespace(i: &'_ str) -> IResult<&'_ str, Cow<'_, str>> {
629    let mut res = Cow::Borrowed("");
630    let mut j = i;
631    while !j.is_empty() {
632        let (k, _) = nom::character::complete::multispace0(j)?;
633        let (k, s) = nom::bytes::complete::escaped(
634            nom::character::complete::none_of("\\\r\n \t"),
635            '\\',
636            nom::character::complete::one_of(r#" :`<>\"#),
637        )(k)?;
638        res = match res {
639            Cow::Borrowed("") => Cow::Borrowed(s),
640            Cow::Borrowed(res_str) => {
641                let mut strg = res_str.to_string();
642                strg.push_str(s);
643                Cow::Owned(strg)
644            }
645            Cow::Owned(mut strg) => {
646                strg.push_str(s);
647                Cow::Owned(strg)
648            }
649        };
650        j = k;
651    }
652
653    Ok((j, res))
654}
655
656/// Replace the following escaped characters:
657///     \\\`\ \:\<\>
658/// with:
659///     \` :<>
660fn rst_escaped_link_destination_transform(i: &'_ str) -> IResult<&'_ str, Cow<'_, str>> {
661    let my_err = |_| {
662        nom::Err::Error(nom::error::Error::new(
663            i,
664            nom::error::ErrorKind::EscapedTransform,
665        ))
666    };
667
668    let c = &*remove_whitespace(i)?.1;
669
670    let s = nom::bytes::complete::escaped_transform::<_, nom::error::Error<_>, _, _, _, _, _, _>(
671        nom::bytes::complete::is_not("\\"),
672        '\\',
673        nom::character::complete::one_of(ESCAPABLE),
674    )(c)
675    .map_err(my_err)?
676    .1;
677
678    // When nothing was changed we can continue with `Borrowed`.
679    if s == i {
680        Ok(("", Cow::Borrowed(i)))
681    } else {
682        Ok(("", Cow::Owned(s)))
683    }
684}
685
686#[cfg(test)]
687mod tests {
688    use super::*;
689    use nom::error::ErrorKind;
690
691    #[test]
692    fn test_rst_text2dest() {
693        let expected = (
694            "abc",
695            (
696                Cow::from("Python home page"),
697                Cow::from("http://www.python.org"),
698                Cow::from(""),
699            ),
700        );
701        assert_eq!(
702            rst_text2dest("`Python home page <http://www.python.org>`__abc").unwrap(),
703            expected
704        );
705
706        let expected = (
707            "abc",
708            (
709                Cow::from(r#"Python<home> page"#),
710                Cow::from("http://www.python.org"),
711                Cow::from(""),
712            ),
713        );
714        assert_eq!(
715            rst_text2dest(r#"`Python\ \<home\> page <http://www.python.org>`__abc"#).unwrap(),
716            expected
717        );
718
719        let expected = (
720            "abc",
721            (
722                Cow::from(r#"my news at <http://python.org>"#),
723                Cow::from("http://news.python.org"),
724                Cow::from(""),
725            ),
726        );
727        assert_eq!(
728            rst_text2dest(r#"`my news at \<http://python.org\> <http://news.python.org>`__abc"#)
729                .unwrap(),
730            expected
731        );
732
733        let expected = (
734            "abc",
735            (
736                Cow::from(r#"my news at <http://python.org>"#),
737                Cow::from(r#"http://news. <python>.org"#),
738                Cow::from(""),
739            ),
740        );
741        assert_eq!(
742            rst_text2dest(
743                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`__abc"#
744            )
745            .unwrap(),
746            expected
747        );
748    }
749
750    #[test]
751    fn test_rst_parse_text2dest_label() {
752        let expected = ("abc", ("Python home page", "http://www.python.org"));
753        assert_eq!(
754            rst_parse_text2target(false, false)("`Python home page <http://www.python.org>`_abc")
755                .unwrap(),
756            expected
757        );
758
759        let expected = nom::Err::Error(nom::error::Error::new("abc", ErrorKind::Tag));
760        assert_eq!(
761            rst_parse_text2target(false, false)("`Python home page <http://www.python.org_>`_abc")
762                .unwrap_err(),
763            expected
764        );
765
766        let expected = nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag));
767        assert_eq!(
768            rst_parse_text2target(false, false)("`_abc").unwrap_err(),
769            expected
770        );
771
772        let expected = ("abc", ("Python home page", "http://www.python.org"));
773        assert_eq!(
774            rst_parse_text2target(true, false)("`Python home page <http://www.python.org>`__abc")
775                .unwrap(),
776            expected
777        );
778
779        let expected = ("abc", (r#"Python\ \<home\> page"#, "http://www.python.org"));
780        assert_eq!(
781            rst_parse_text2target(false, false)(
782                r#"`Python\ \<home\> page <http://www.python.org>`_abc"#
783            )
784            .unwrap(),
785            expected
786        );
787
788        let expected = (
789            "abc",
790            (
791                r#"my news at \<http://python.org\>"#,
792                "http://news.python.org",
793            ),
794        );
795        assert_eq!(
796            rst_parse_text2target(false, false)(
797                r#"`my news at \<http://python.org\> <http://news.python.org>`_abc"#
798            )
799            .unwrap(),
800            expected
801        );
802
803        let expected = (
804            "abc",
805            (
806                r#"my news at \<http\://python.org\>"#,
807                r#"http:// news.\ \<python\>.org"#,
808            ),
809        );
810        assert_eq!(
811            rst_parse_text2target(false, false)(
812                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`_abc"#
813            )
814            .unwrap(),
815            expected
816        );
817
818        let expected = (
819            "abc",
820            (
821                r#"my news at \<http\://python.org\>"#,
822                r#"http:// news.\ \<python\>.org"#,
823            ),
824        );
825        assert_eq!(
826            rst_parse_text2target(false, false)(
827                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`_abc"#
828            )
829            .unwrap(),
830            expected
831        );
832        let expected = ("abc", (r#"rst link text"#, "rst_link_label"));
833        assert_eq!(
834            rst_parse_text2target(false, true)(r#"`rst link text <rst_link_label_>`_abc"#).unwrap(),
835            expected
836        );
837
838        let expected = nom::Err::Error(nom::error::Error::new("abc", ErrorKind::Tag));
839        assert_eq!(
840            rst_parse_text2target(false, true)(r#"`my news <python webpage>`_abc"#).unwrap_err(),
841            expected
842        );
843    }
844
845    #[test]
846    fn test_rst_text2label() {
847        assert_eq!(
848            rst_text2label(r#"link_text_ abc"#),
849            Ok((" abc", (Cow::from("link_text"), Cow::from("link_text"))))
850        );
851        assert_eq!(
852            rst_text2label(r#"`li\:nk text`_ abc"#),
853            Ok((" abc", (Cow::from("li:nk text"), Cow::from("li:nk text"))))
854        );
855        assert_eq!(
856            rst_text2label("`link text`__ abc"),
857            Ok((" abc", (Cow::from("link text"), Cow::from("_"))))
858        );
859    }
860
861    #[test]
862    fn test_rst_parse_text2label() {
863        assert_eq!(
864            rst_parse_text2label("linktext_ abc"),
865            Ok((" abc", ("linktext", "linktext")))
866        );
867
868        assert_eq!(
869            rst_parse_text2label("linktext__ abc"),
870            Ok((" abc", ("linktext", "_")))
871        );
872
873        assert_eq!(
874            rst_parse_text2label("link_text_ abc"),
875            Ok((" abc", ("link_text", "link_text")))
876        );
877
878        assert_eq!(
879            rst_parse_text2label("`link text`_ abc"),
880            Ok((" abc", ("link text", "link text")))
881        );
882
883        assert_eq!(
884            rst_parse_text2label("`link text`_abc"),
885            Ok(("abc", ("link text", "link text")))
886        );
887
888        assert_eq!(
889            rst_parse_text2label("`link_text`_ abc"),
890            Ok((" abc", ("link_text", "link_text")))
891        );
892
893        assert_eq!(
894            rst_parse_text2label("`link text`__ abc"),
895            Ok((" abc", ("link text", "_")))
896        );
897
898        assert_eq!(
899            rst_parse_text2label("`link text<link label_>`_ abc"),
900            Ok((" abc", ("link text", "link label")))
901        );
902    }
903
904    #[test]
905    fn test_rst_label2dest() {
906        let expected = (
907            "\nabc",
908            (
909                Cow::from("Python: home page"),
910                Cow::from("http://www.python.org"),
911                Cow::from(""),
912            ),
913        );
914        assert_eq!(
915            rst_label2dest(".. _`Python: home page`: http://www.python.org\nabc").unwrap(),
916            expected
917        );
918        assert_eq!(
919            rst_label2dest("  .. _`Python: home page`: http://www.py\n     thon.org    \nabc")
920                .unwrap(),
921            expected
922        );
923
924        let expected = nom::Err::Error(nom::error::Error::new(
925            "x .. _`Python: home page`: http://www.python.org\nabc",
926            ErrorKind::Tag,
927        ));
928        assert_eq!(
929            rst_label2dest("x .. _`Python: home page`: http://www.python.org\nabc").unwrap_err(),
930            expected
931        );
932
933        let expected = (
934            "",
935            (
936                Cow::from("Python: `home page`"),
937                Cow::from("http://www.python .org"),
938                Cow::from(""),
939            ),
940        );
941        assert_eq!(
942            rst_label2dest(r#".. _Python\: \`home page\`: http://www.python\ .org"#).unwrap(),
943            expected
944        );
945        assert_eq!(
946            rst_label2dest(r#".. _`Python: \`home page\``: http://www.python\ .org"#).unwrap(),
947            expected
948        );
949
950        let expected = (
951            "",
952            (
953                Cow::from("my news at <http://python.org>"),
954                Cow::from("http://news.python.org"),
955                Cow::from(""),
956            ),
957        );
958        assert_eq!(
959            rst_label2dest(r#".. _`my news at <http://python.org>`: http://news.python.org"#)
960                .unwrap(),
961            expected
962        );
963        assert_eq!(
964            rst_label2dest(r#".. _`my news at \<http://python.org\>`: http://news.python.org"#)
965                .unwrap(),
966            expected
967        );
968        assert_eq!(
969            rst_label2dest(r#".. _my news at \<http\://python.org\>: http://news.python.org"#)
970                .unwrap(),
971            expected
972        );
973
974        let expected = (
975            "",
976            (
977                Cow::from("my news"),
978                Cow::from("http://news.<python>.org"),
979                Cow::from(""),
980            ),
981        );
982        assert_eq!(
983            rst_label2dest(r#".. _my news: http://news.<python>.org"#).unwrap(),
984            expected
985        );
986        assert_eq!(
987            rst_label2dest(r#".. _my news: http://news.\<python\>.org"#).unwrap(),
988            expected
989        );
990
991        let expected = (
992            "",
993            (
994                Cow::from("_"),
995                Cow::from("http://news.python.org"),
996                Cow::from(""),
997            ),
998        );
999        assert_eq!(
1000            rst_label2dest(r#".. __: http://news.python.org"#).unwrap(),
1001            expected
1002        );
1003        assert_eq!(
1004            rst_label2dest(r#"__ http://news.python.org"#).unwrap(),
1005            expected
1006        );
1007        assert_eq!(
1008            rst_label2dest(".. _label: `link destination`_").unwrap_err(),
1009            nom::Err::Error(nom::error::Error::new(
1010                ".. _label: `link destination`_",
1011                ErrorKind::EscapedTransform
1012            )),
1013        );
1014        assert_eq!(
1015            rst_label2dest("__ link_destination_").unwrap_err(),
1016            nom::Err::Error(nom::error::Error::new(
1017                "__ link_destination_",
1018                ErrorKind::EscapedTransform
1019            )),
1020        );
1021    }
1022
1023    #[test]
1024    fn test_rst_label2label() {
1025        assert_eq!(
1026            rst_label2label("   .. _`alt label`: `label`_\nabc"),
1027            Ok(("\nabc", (Cow::from("alt label"), Cow::from("label"))))
1028        );
1029        assert_eq!(
1030            rst_label2label("   .. __: label_\nabc"),
1031            Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
1032        );
1033        assert_eq!(
1034            rst_label2label("   __ label_\nabc"),
1035            Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
1036        );
1037        assert_eq!(
1038            rst_label2label("_label: label").unwrap_err(),
1039            nom::Err::Error(nom::error::Error::new("_label: label", ErrorKind::Tag)),
1040        );
1041        assert_eq!(
1042            rst_label2label("__ destination").unwrap_err(),
1043            nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)),
1044        );
1045    }
1046
1047    #[test]
1048    fn test_rst_parse_label2target() {
1049        let expected = ("", ("Python home page", "http://www.python.org"));
1050        assert_eq!(
1051            rst_parse_label2target(false)("_Python home page: http://www.python.org").unwrap(),
1052            expected
1053        );
1054        assert_eq!(
1055            rst_parse_label2target(false)("_`Python home page`: http://www.python.org").unwrap(),
1056            expected
1057        );
1058
1059        let expected = ("", ("Python: home page", "http://www.python.org"));
1060        assert_eq!(
1061            rst_parse_label2target(false)("_`Python: home page`: http://www.python.org").unwrap(),
1062            expected
1063        );
1064
1065        let expected = ("", (r#"Python\: home page"#, "http://www.python.org"));
1066        assert_eq!(
1067            rst_parse_label2target(false)(r#"_Python\: home page: http://www.python.org"#).unwrap(),
1068            expected
1069        );
1070
1071        let expected = (
1072            "",
1073            ("my news at <http://python.org>", "http://news.python.org"),
1074        );
1075        assert_eq!(
1076            rst_parse_label2target(false)(
1077                r#"_`my news at <http://python.org>`: http://news.python.org"#
1078            )
1079            .unwrap(),
1080            expected
1081        );
1082
1083        let expected = (
1084            "",
1085            (
1086                r#"my news at \<http://python.org\>"#,
1087                "http://news.python.org",
1088            ),
1089        );
1090        assert_eq!(
1091            rst_parse_label2target(false)(
1092                r#"_`my news at \<http://python.org\>`: http://news.python.org"#
1093            )
1094            .unwrap(),
1095            expected
1096        );
1097
1098        let expected = (
1099            "",
1100            (
1101                r#"my news at \<http\://python.org\>"#,
1102                "http://news.python.org",
1103            ),
1104        );
1105        assert_eq!(
1106            rst_parse_label2target(false)(
1107                r#"_my news at \<http\://python.org\>: http://news.python.org"#
1108            )
1109            .unwrap(),
1110            expected
1111        );
1112
1113        let expected = ("", ("_", "http://news.python.org"));
1114        assert_eq!(
1115            rst_parse_label2target(false)(r#"__: http://news.python.org"#).unwrap(),
1116            expected
1117        );
1118
1119        let expected = ("", ("alt_label", "one_word_label"));
1120        assert_eq!(
1121            rst_parse_label2target(true)("_alt_label: one_word_label_").unwrap(),
1122            expected
1123        );
1124
1125        let expected = ("", ("alt label", "more words label"));
1126        assert_eq!(
1127            rst_parse_label2target(true)("_`alt label`: `more words label`_").unwrap(),
1128            expected
1129        );
1130    }
1131
1132    #[test]
1133    fn test_parse_simple_label() {
1134        let expected = ("", "one_word_label");
1135        assert_eq!(rst_parse_simple_label("one_word_label_").unwrap(), expected);
1136
1137        let expected = (" abc", "one_word_label");
1138        assert_eq!(
1139            rst_parse_simple_label("one_word_label_ abc").unwrap(),
1140            expected
1141        );
1142        assert_eq!(
1143            rst_parse_simple_label("`one_word_label`_ abc").unwrap(),
1144            expected
1145        );
1146
1147        let expected = ("", "more words label");
1148        assert_eq!(
1149            rst_parse_simple_label("`more words label`_").unwrap(),
1150            expected
1151        );
1152
1153        let expected = (". abc", "more words label");
1154        assert_eq!(
1155            rst_parse_simple_label("`more words label`_. abc").unwrap(),
1156            expected
1157        );
1158
1159        let expected = ("? abc", "more words label");
1160        assert_eq!(
1161            rst_parse_simple_label("`more words label`_? abc").unwrap(),
1162            expected
1163        );
1164
1165        let expected = (" abc", "more words label");
1166        assert_eq!(
1167            rst_parse_simple_label("`more words label`_ abc").unwrap(),
1168            expected
1169        );
1170
1171        assert_eq!(
1172            rst_parse_simple_label("_").unwrap_err(),
1173            nom::Err::Error(nom::error::Error::new("", ErrorKind::Not)),
1174        );
1175
1176        assert_eq!(
1177            rst_parse_simple_label("``_").unwrap_err(),
1178            nom::Err::Error(nom::error::Error::new("``_", ErrorKind::TakeTill1)),
1179        );
1180    }
1181
1182    #[test]
1183    fn test_rst_explicit_markup_block() {
1184        assert_eq!(
1185            rst_explicit_markup_block(".. ")(".. 11111"),
1186            Ok(("", Cow::from("11111")))
1187        );
1188        assert_eq!(
1189            rst_explicit_markup_block(".. ")("   .. 11111\nout"),
1190            Ok(("\nout", Cow::from("11111")))
1191        );
1192        assert_eq!(
1193            rst_explicit_markup_block(".. ")("   .. 11111\n      222222\n      333333\nout"),
1194            Ok(("\nout", Cow::from("11111 222222 333333")))
1195        );
1196        assert_eq!(
1197            rst_explicit_markup_block(".. ")("   .. first\n      second\n       1indent\nout"),
1198            Ok(("\nout", Cow::from("first second  1indent")))
1199        );
1200        assert_eq!(
1201            rst_explicit_markup_block(".. ")("   ..first"),
1202            Err(nom::Err::Error(nom::error::Error::new(
1203                "..first",
1204                ErrorKind::Tag
1205            )))
1206        );
1207        assert_eq!(
1208            rst_explicit_markup_block(".. ")("x  .. first"),
1209            Err(nom::Err::Error(nom::error::Error::new(
1210                "x  .. first",
1211                ErrorKind::Tag
1212            )))
1213        );
1214    }
1215
1216    #[test]
1217    fn test_rst_escaped_link_text_transform() {
1218        assert_eq!(rst_escaped_link_text_transform(""), Ok(("", Cow::from(""))));
1219        // Different than the link destination version.
1220        assert_eq!(
1221            rst_escaped_link_text_transform("   "),
1222            Ok(("", Cow::from("   ")))
1223        );
1224        // Different than the link destination version.
1225        assert_eq!(
1226            rst_escaped_link_text_transform(r#"\ \ \ "#),
1227            Ok(("", Cow::from("")))
1228        );
1229        assert_eq!(
1230            rst_escaped_link_text_transform(r#"abc`:<>abc"#),
1231            Ok(("", Cow::from(r#"abc`:<>abc"#)))
1232        );
1233        assert_eq!(
1234            rst_escaped_link_text_transform(r#"\:\`\<\>\\"#),
1235            Ok(("", Cow::from(r#":`<>\"#)))
1236        );
1237    }
1238
1239    #[test]
1240    fn test_rst_escaped_link_destination_transform() {
1241        assert_eq!(
1242            rst_escaped_link_destination_transform(""),
1243            Ok(("", Cow::Borrowed("")))
1244        );
1245        // Different than the link name version.
1246        assert_eq!(
1247            rst_escaped_link_destination_transform("  "),
1248            Ok(("", Cow::Borrowed("")))
1249        );
1250        assert_eq!(
1251            rst_escaped_link_destination_transform(" x x"),
1252            Ok(("", Cow::Owned("xx".to_string())))
1253        );
1254        // Different than the link name version.
1255        assert_eq!(
1256            rst_escaped_link_destination_transform(r#"\ \ \ "#),
1257            Ok(("", Cow::Owned("   ".to_string())))
1258        );
1259        assert_eq!(
1260            rst_escaped_link_destination_transform(r#"abc`:<>abc"#),
1261            Ok(("", Cow::Borrowed(r#"abc`:<>abc"#)))
1262        );
1263        assert_eq!(
1264            rst_escaped_link_destination_transform(r#"\:\`\<\>\\"#),
1265            Ok(("", Cow::Owned(r#":`<>\"#.to_string())))
1266        );
1267    }
1268    #[test]
1269    fn test_remove_whitespace() {
1270        assert_eq!(remove_whitespace(" abc "), Ok(("", Cow::Borrowed("abc"))));
1271        assert_eq!(
1272            remove_whitespace(" x x"),
1273            Ok(("", Cow::Owned("xx".to_string())))
1274        );
1275        assert_eq!(remove_whitespace("  \t \r \n"), Ok(("", Cow::from(""))));
1276        assert_eq!(
1277            remove_whitespace(r#"\ \ \ "#),
1278            Ok(("", Cow::Borrowed(r#"\ \ \ "#)))
1279        );
1280        assert_eq!(
1281            remove_whitespace(r#"abc`:<>abc"#),
1282            Ok(("", Cow::Borrowed(r#"abc`:<>abc"#)))
1283        );
1284        assert_eq!(
1285            remove_whitespace(r#"\:\`\<\>\\"#),
1286            Ok(("", Cow::Borrowed(r#"\:\`\<\>\\"#)))
1287        );
1288
1289        assert_eq!(
1290            remove_whitespace("http://www.py\n     thon.org"),
1291            Ok(("", Cow::Owned("http://www.python.org".to_string())))
1292        );
1293    }
1294}