parse_hyperlinks/parser/
restructured_text.rs

1//! This module implements parsers for RestructuredText hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::parse::LABEL_LEN_MAX;
6use crate::parser::Link;
7use nom::branch::alt;
8use nom::bytes::complete::tag;
9use nom::combinator::*;
10use nom::IResult;
11use std::borrow::Cow;
12
13/// Character that can be escaped with `\`.
14///
15/// Note: If ever you change this, change also
16/// `rst_escaped_link_text_transform()`.
17const ESCAPABLE: &str = r#" `:<>_\"#;
18
19/// Wrapper around `rst_text2dest()` that packs the result in
20/// `Link::Text2Dest`.
21pub fn rst_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
22    let (i, (te, de, ti)) = rst_text2dest(i)?;
23    Ok((i, Link::Text2Dest(te, de, ti)))
24}
25
26/// Parse a RestructuredText _inline hyperlink_.
27///
28/// The parser expects to start at the link start (\`) to succeed.
29/// As rst does not know about link titles,
30/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`
31/// ```
32/// use parse_hyperlinks::parser::Link;
33/// use parse_hyperlinks::parser::restructured_text::rst_text2dest;
34/// use std::borrow::Cow;
35///
36/// assert_eq!(
37///   rst_text2dest("`name <destination>`__abc"),
38///   Ok(("abc", (Cow::from("name"), Cow::from("destination"), Cow::from(""))))
39/// );
40/// ```
41/// A hyperlink reference may directly embed a destination URI or (since Docutils
42/// 0.11) a hyperlink reference within angle brackets `<>` as shown in the
43/// following example:
44/// ```rst
45/// abc `Python home page <http://www.python.org>`__ abc
46/// ```
47/// The bracketed URI must be preceded by whitespace and be the last text
48/// before the end string.
49pub fn rst_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
50    let (i, (ln, ld)) = rst_parse_text2target(true, false)(i)?;
51    let ln = rst_escaped_link_text_transform(ln)?.1;
52    let ld = rst_escaped_link_destination_transform(ld)?.1;
53
54    Ok((i, (ln, ld, Cow::Borrowed(""))))
55}
56
57/// Wrapper around `rst_textlabel2dest()` that packs the result in
58/// `Link::TextLabel2Dest`.
59pub fn rst_text_label2dest_link(i: &str) -> nom::IResult<&str, Link> {
60    let (i, (te, de, ti)) = rst_text_label2dest(i)?;
61    Ok((i, Link::TextLabel2Dest(te, de, ti)))
62}
63
64/// Parse a RestructuredText combined _inline hyperlink_ with _link reference definition_.
65///
66/// The parser expects to start at the link start (\`) to succeed.
67/// As rst does not know about link titles,
68/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`.
69/// ```
70/// use parse_hyperlinks::parser::Link;
71/// use parse_hyperlinks::parser::restructured_text::rst_text_label2dest;
72/// use std::borrow::Cow;
73///
74/// assert_eq!(
75///   rst_text_label2dest("`name <destination>`_abc"),
76///   Ok(("abc", (Cow::from("name"), Cow::from("destination"), Cow::from(""))))
77/// );
78/// ```
79/// A hyperlink reference may directly embed a destination URI or (since Docutils
80/// 0.11) a hyperlink reference within angle brackets `<>` as shown in the
81/// following example:
82/// ```rst
83/// abc `Python home page <http://www.python.org>`_ abc
84/// ```
85/// The bracketed URI must be preceded by whitespace and be the last text
86/// before the end string.
87pub fn rst_text_label2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
88    let (i, (ln, ld)) = rst_parse_text2target(false, false)(i)?;
89    let ln = rst_escaped_link_text_transform(ln)?.1;
90    let ld = rst_escaped_link_destination_transform(ld)?.1;
91
92    Ok((i, (ln, ld, Cow::Borrowed(""))))
93}
94
95/// This parser finds rst links of type:
96///     `*<*>`__
97/// or:
98///     `*<*>`_
99///
100/// Escape sequences are recognized and skipped, but not replaced here.
101/// If `anonym==true`: it recognizes:
102///     `*<*>`__
103/// otherwise:
104///     `*<*>`_
105///
106/// If `label==true` (`target==label`): it recognizes
107///     `*<*_>`_?
108/// otherwise (`target==dest`):
109///     `*<*>`_?
110fn rst_parse_text2target(
111    anonym: bool,
112    label: bool,
113) -> impl Fn(&str) -> IResult<&str, (&str, &str)> {
114    move |i: &str| {
115        let (mut i, inner) = nom::sequence::delimited(
116            tag("`"),
117            nom::bytes::complete::escaped(
118                nom::character::complete::none_of(r#"\`"#),
119                '\\',
120                nom::character::complete::one_of(ESCAPABLE),
121            ),
122            tag("`_"),
123        )(i)?;
124
125        if anonym {
126            let (j, _) = nom::character::complete::char('_')(i)?;
127            i = j;
128        };
129
130        // Assure that the next char is not`_`.
131        if !i.is_empty() {
132            let _ = nom::combinator::not(nom::character::complete::char('_'))(i)?;
133        };
134
135        // From here on, we only deal with the inner result of the above.
136        // Take everything until the first unescaped `<`
137        let (inner_rest, link_text): (&str, &str) = nom::bytes::complete::escaped(
138            nom::character::complete::none_of(r#"\<"#),
139            '\\',
140            nom::character::complete::one_of(ESCAPABLE),
141        )(inner)?;
142        // Trim trailing whitespace.
143        let link_text = link_text.trim_end();
144
145        let (j, mut link_dest_label) = nom::sequence::delimited(
146            tag("<"),
147            nom::bytes::complete::escaped(
148                nom::character::complete::none_of(r#"\<>"#),
149                '\\',
150                nom::character::complete::one_of(ESCAPABLE),
151            ),
152            tag(">"),
153        )(inner_rest)?;
154
155        // Fail if there are bytes left between `>` and `\``.
156        let (_, _) = nom::combinator::eof(j)?;
157
158        // Now check if `link_dest_label` is what we are expecting (which depends
159        // on `label`).
160
161        // Fail if `link_dest_label` is empty.
162        let (_, _) = nom::combinator::not(nom::combinator::eof)(link_dest_label)?;
163
164        // Get last char.
165        let last_char_is_ = link_dest_label.is_char_boundary(link_dest_label.len() - 1)
166            && &link_dest_label[link_dest_label.len() - 1..] == "_";
167        // If (`label==true`), we expect trailing `_`, fail otherwise.
168        // If (`label==false`), we fail when there is a trailing `_`.
169        if (label && !last_char_is_) || (!label && last_char_is_) {
170            return Err(nom::Err::Error(nom::error::Error::new(
171                i,
172                nom::error::ErrorKind::Tag,
173            )));
174        };
175        // When label, strip trailing `_`.
176        if label {
177            link_dest_label = &link_dest_label[..link_dest_label.len() - 1];
178        };
179
180        Ok((i, (link_text, link_dest_label)))
181    }
182}
183
184/// Wrapper around `rst_text2dest()` that packs the result in
185/// `Link::Text2Dest`.
186pub fn rst_text2label_link(i: &str) -> nom::IResult<&str, Link> {
187    let (i, (te, la)) = rst_text2label(i)?;
188    Ok((i, Link::Text2Label(te, la)))
189}
190
191/// Parse a RestructuredText _reference link_.
192///
193/// The caller must guarantee, that
194/// * the parser is at the input start (no bytes exist before).
195/// * the preceding bytes are whitespaces or newline, _or_
196/// * the preceding bytes are whitespaces or newline, followed by one of: `([<'"`
197/// ```rust
198/// use parse_hyperlinks::parser::Link;
199/// use parse_hyperlinks::parser::restructured_text::rst_text2label;
200/// use std::borrow::Cow;
201///
202/// assert_eq!(
203///   rst_text2label("linktext_ abc"),
204///   Ok((" abc", (Cow::from("linktext"), Cow::from("linktext"))))
205/// );
206/// assert_eq!(
207///   rst_text2label("`link text`_ abc"),
208///   Ok((" abc", (Cow::from("link text"), Cow::from("link text"))))
209/// );
210/// assert_eq!(
211///   rst_text2label("`link text<link label_>`_ abc"),
212///   Ok((" abc", (Cow::from("link text"), Cow::from("link label"))))
213/// );
214/// assert_eq!(
215///   rst_text2label("`link text`__ abc"),
216///   Ok((" abc", (Cow::from("link text"), Cow::from("_"))))
217/// );
218/// ```
219///
220pub fn rst_text2label(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
221    let (i, (te, la)) = rst_parse_text2label(i)?;
222    let te = rst_escaped_link_text_transform(te)?.1;
223    let la = rst_escaped_link_text_transform(la)?.1;
224
225    Ok((i, (te, la)))
226}
227
228/// Parses a _reference link_. (Doctree element `reference`).
229///
230/// Named hyperlink references:
231/// No start-string, end-string = `_.
232/// Start-string = "`", end-string = `\`_`. (Phrase references.)
233/// Anonymous hyperlink references:
234/// No start-string, end-string = `__`.
235/// Start-string = "`", end-string = `\`__`. (Phrase references.)
236///
237///
238/// Hyperlink references are indicated by a trailing underscore, "_", except for
239/// standalone hyperlinks which are recognized independently.
240///
241/// Important: before this parser try `rst_text2dest()` first!
242///
243/// The caller must guarantee, that either:
244/// * we are at the input start -or-
245/// * the byte just before was a whitespace (including newline)!
246///
247/// For named references in reStructuredText `link_text` and `link_label`
248/// are the same. By convention we return for anonymous references:
249/// `link_label='_'`.
250///
251/// The parser checks that this _reference link_ is followed by a whitespace
252/// without consuming it.
253///
254fn rst_parse_text2label(i: &str) -> nom::IResult<&str, (&str, &str)> {
255    let (mut i, (link_text, mut link_label)) = alt((
256        rst_parse_text2target(false, true),
257        nom::combinator::map(rst_parse_simple_label, |s| (s, s)),
258    ))(i)?;
259
260    // Is this an anonymous reference? Consume the second `_` also.
261    if let (j, Some(_)) = nom::combinator::opt(nom::character::complete::char('_'))(i)? {
262        link_label = "_";
263        i = j;
264    };
265
266    Ok((i, (link_text, link_label)))
267}
268
269/// Wrapper around `rst_label2dest()` that packs the result in
270/// `Link::Label2Dest`.
271pub fn rst_label2dest_link(i: &str) -> nom::IResult<&str, Link> {
272    let (i, (l, d, t)) = rst_label2dest(i)?;
273    Ok((i, Link::Label2Dest(l, d, t)))
274}
275
276/// Parse a reStructuredText _link reference definition_.
277///
278/// This parser consumes until the end of the line. As rst does not know about link titles,
279/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`.
280/// ```
281/// use parse_hyperlinks::parser::Link;
282/// use parse_hyperlinks::parser::restructured_text::rst_label2dest;
283/// use std::borrow::Cow;
284///
285/// assert_eq!(
286///   rst_label2dest("   .. _`label`: destination\nabc"),
287///   Ok(("\nabc", (Cow::from("label"), Cow::from("destination"), Cow::from(""))))
288/// );
289/// assert_eq!(
290///   rst_label2dest("   .. __: destination\nabc"),
291///   Ok(("\nabc", (Cow::from("_"), Cow::from("destination"), Cow::from(""))))
292/// );
293/// assert_eq!(
294///   rst_label2dest("   __ destination\nabc"),
295///   Ok(("\nabc", (Cow::from("_"), Cow::from("destination"), Cow::from(""))))
296/// );
297/// ```
298/// Here some examples for link references:
299/// ```rst
300/// .. _Python home page: http://www.python.org
301/// .. _`Python: home page`: http://www.python.org
302/// ```
303/// See unit test `test_rst_label2dest()` for more examples.
304pub fn rst_label2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
305    let (i, (l, d)) = rst_label2target(false, i)?;
306    Ok((i, (l, d, Cow::from(""))))
307}
308
309/// Wrapper around `rst_label2label()` that packs the result in
310/// `Link::Label2Label`.
311pub fn rst_label2label_link(i: &str) -> nom::IResult<&str, Link> {
312    let (i, (l1, l2)) = rst_label2label(i)?;
313    Ok((i, Link::Label2Label(l1, l2)))
314}
315
316/// Parse a reStructuredText _link reference to link reference definition_.
317/// This type defines an alias (alternative name) for a link reference:
318/// ```
319/// use parse_hyperlinks::parser::Link;
320/// use parse_hyperlinks::parser::restructured_text::rst_label2label;
321/// use std::borrow::Cow;
322///
323/// assert_eq!(
324///   rst_label2label("   .. _`alt label`: `label`_\nabc"),
325///   Ok(("\nabc", (Cow::from("alt label"), Cow::from("label"))))
326/// );
327/// assert_eq!(
328///   rst_label2label("   .. __: label_\nabc"),
329///   Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
330/// );
331/// assert_eq!(
332///   rst_label2label("   __ label_\nabc"),
333///   Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
334/// );
335/// ```
336pub fn rst_label2label(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
337    rst_label2target(true, i)
338}
339
340/// Parser for _link_reference_definitions_:
341/// * `label==false`:  the link is of type `Label2Dest`
342/// * `label==true`: the link is of type `Label2Label`
343fn rst_label2target(label: bool, i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
344    let my_err = |_| {
345        nom::Err::Error(nom::error::Error::new(
346            i,
347            nom::error::ErrorKind::EscapedTransform,
348        ))
349    };
350
351    // If there is a block start? What kind of?
352    let (i, c, block_header_is__) =
353        if let (i, Some(c)) = nom::combinator::opt(rst_explicit_markup_block(".. "))(i)? {
354            (i, c, false)
355        } else {
356            let (i, c) = rst_explicit_markup_block("__ ")(i)?;
357            (i, c, true)
358        };
359
360    let (source, target) = match c {
361        Cow::Borrowed(s) => {
362            let (_, (ls, lt)) = if !block_header_is__ {
363                rst_parse_label2target(label)(s)?
364            } else if label {
365                // This is supposed to be a label.
366                ("", ("_", rst_parse_simple_label(s)?.1))
367            } else {
368                // This is supposed to be a destination (url).
369                ("", ("_", s))
370            };
371            // If the target is a destination (not a label), the last char must not be `_`.
372            if !label {
373                let _ = nom::combinator::not(rst_parse_simple_label)(lt).map_err(my_err)?;
374            };
375            (
376                rst_escaped_link_text_transform(ls)?.1,
377                rst_escaped_link_destination_transform(lt)?.1,
378            )
379        }
380
381        Cow::Owned(strg) => {
382            let (_, (ls, lt)) = if !block_header_is__ {
383                rst_parse_label2target(label)(&strg).map_err(my_err)?
384            } else if label {
385                // This is supposed to be a label.
386                let s = rst_parse_simple_label(&strg).map_err(my_err)?.1;
387                ("", ("_", s))
388            } else {
389                // This is supposed to be a destination (url).
390                ("", ("_", strg.as_str()))
391            };
392            // If the target is a destination (not a label), the last char must not be `_`.
393            if !label {
394                let _ = nom::combinator::not(rst_parse_simple_label)(lt).map_err(my_err)?;
395            };
396            let ls = Cow::Owned(
397                rst_escaped_link_text_transform(ls)
398                    .map_err(my_err)?
399                    .1
400                    .to_string(),
401            );
402            let lt = Cow::Owned(
403                rst_escaped_link_destination_transform(lt)
404                    .map_err(my_err)?
405                    .1
406                    .to_string(),
407            );
408            (ls, lt)
409        }
410    };
411
412    // We do not need to consume whitespace until the end of the line,
413    // because `rst_explicit_markup_block()` had stripped the whitespace
414    // already.
415
416    Ok((i, (source, target)))
417}
418
419/// The parser recognizes `Label2Dest` links (`label==false`):
420///     _label: dest
421/// or `Label2Label` links (`label==true):
422///     _alt_label: label_
423/// It does not perform any escape character transformation.
424fn rst_parse_label2target(label: bool) -> impl Fn(&str) -> IResult<&str, (&str, &str)> {
425    move |i: &str| {
426        let (i, link_text) = alt((
427            nom::sequence::delimited(
428                tag("_`"),
429                nom::bytes::complete::escaped(
430                    nom::character::complete::none_of(r#"\`"#),
431                    '\\',
432                    nom::character::complete::one_of(ESCAPABLE),
433                ),
434                tag("`: "),
435            ),
436            nom::sequence::delimited(
437                tag("_"),
438                nom::bytes::complete::escaped(
439                    nom::character::complete::none_of(r#"\:"#),
440                    '\\',
441                    nom::character::complete::one_of(ESCAPABLE),
442                ),
443                tag(": "),
444            ),
445            nom::combinator::value("_", tag("__: ")),
446        ))(i)?;
447
448        let link_target = if label {
449            // The target is another label.
450            rst_parse_simple_label(i)?.1
451        } else {
452            // The target is a destination.
453            i
454        };
455
456        Ok(("", (link_text, link_target)))
457    }
458}
459
460/// This parser consumes a simple label:
461///     one_word_label_
462/// or
463///     `more words label`_
464fn rst_parse_simple_label(i: &str) -> nom::IResult<&str, &str> {
465    // Consumes and returns a word ending with `_`.
466    // Strips off one the trailing `_` before returning the result.
467    fn take_word_consume_first_ending_underscore(i: &str) -> nom::IResult<&str, &str> {
468        let mut i = i;
469        let (k, mut r) = nom::bytes::complete::take_till1(|c: char| {
470            !(c.is_alphanumeric() || c == '-' || c == '_')
471        })(i)?;
472        // Is `r` ending with `__`? There should be at least 2 bytes: `"__".len()`
473        if r.len() >= 3 && r.is_char_boundary(r.len() - 2) && &r[r.len() - 2..] == "__" {
474            // Consume one `_`, but keep one `_` in remaining bytes.
475            i = &i[r.len() - 1..];
476            // Strip two `__` from result.
477            r = &r[..r.len() - 2];
478        // Is `r` ending with `_`? There should be at least 1 byte: `"_".len()`.
479        } else if !r.is_empty() && r.is_char_boundary(r.len() - 1) && &r[r.len() - 1..] == "_" {
480            // Remaining bytes.
481            i = k;
482            // Strip `_` from result.
483            r = &r[..r.len() - 1]
484        } else {
485            return Err(nom::Err::Error(nom::error::Error::new(
486                k,
487                nom::error::ErrorKind::Tag,
488            )));
489        };
490
491        Ok((i, r))
492    }
493
494    let (i, r) = nom::combinator::verify(
495        alt((
496            nom::sequence::delimited(
497                tag("`"),
498                nom::bytes::complete::escaped(
499                    nom::character::complete::none_of(r#"\`"#),
500                    '\\',
501                    nom::character::complete::one_of(ESCAPABLE),
502                ),
503                tag("`_"),
504            ),
505            take_word_consume_first_ending_underscore,
506        )),
507        |s: &str| s.len() <= LABEL_LEN_MAX,
508    )(i)?;
509
510    // Return error if label is empty.
511    let _ = nom::combinator::not(alt((nom::combinator::eof, tag("``"))))(r)?;
512
513    Ok((i, r))
514}
515
516/// This parses an explicit markup block.
517/// The parser expects to start at the beginning of the line.
518/// Syntax diagram:
519/// ```text
520/// +-------+----------------------+
521/// | ".. " | in  1                |
522/// +-------+ in  2                |
523///         |    in  3             |
524///         +----------------------+
525/// out
526/// ```
527/// An explicit markup block is a text block:
528/// * whose first line begins with ".." followed by whitespace (the "explicit
529///   markup start"),
530/// * whose second and subsequent lines (if any) are indented relative to the
531///   first, and
532/// * which ends before an unintended line.
533///
534/// As with external hyperlink targets, the link block of an indirect
535/// hyperlink target may begin on the same line as the explicit markup start
536/// or the next line. It may also be split over multiple lines, in which case
537/// the lines are joined with whitespace before being normalized.
538fn rst_explicit_markup_block<'a>(
539    block_header: &'a str,
540) -> impl Fn(&'a str) -> IResult<&'a str, Cow<'a, str>> {
541    move |i: &'a str| {
542        fn indent<'a>(wsp1: &'a str, wsp2: &'a str) -> impl Fn(&'a str) -> IResult<&'a str, ()> {
543            move |i: &str| {
544                let (i, _) = nom::character::complete::line_ending(i)?;
545                let (i, _) = nom::bytes::complete::tag(wsp1)(i)?;
546                let (i, _) = nom::bytes::complete::tag(wsp2)(i)?;
547                Ok((i, ()))
548            }
549        }
550
551        let (i, (wsp1, wsp2)) = nom::sequence::pair(
552            nom::character::complete::space0,
553            nom::combinator::map(nom::bytes::complete::tag(block_header), |_| "   "),
554        )(i)?;
555
556        let (j, v) = nom::multi::separated_list1(
557            indent(wsp1, wsp2),
558            nom::character::complete::not_line_ending,
559        )(i)?;
560
561        // If the block consists of only one line return now.
562        if v.len() == 1 {
563            return Ok((j, Cow::Borrowed(v[0])));
564        };
565
566        let mut s = String::new();
567        let mut is_first = true;
568
569        for subs in &v {
570            if !is_first {
571                s.push(' ');
572            }
573            s.push_str(subs);
574            is_first = false;
575        }
576
577        Ok((j, Cow::from(s)))
578    }
579}
580
581/// Replace the following escaped characters:
582///     \\\`\ \:\<\>
583/// with:
584///     \`:<>
585/// Preserves usual whitespace, but removes `\ `.
586fn rst_escaped_link_text_transform(i: &str) -> IResult<&str, Cow<str>> {
587    nom::combinator::map(
588        nom::bytes::complete::escaped_transform(
589            nom::bytes::complete::is_not("\\"),
590            '\\',
591            // This list is the same as `ESCAPABLE`.
592            alt((
593                tag("\\"),
594                tag("`"),
595                tag(":"),
596                tag("<"),
597                tag(">"),
598                tag("_"),
599                value("", tag(" ")),
600            )),
601        ),
602        |s| if s == i { Cow::from(i) } else { Cow::from(s) },
603    )(i)
604}
605
606/// Deletes all whitespace, but keeps one space for each `\ `.
607fn remove_whitespace(i: &str) -> IResult<&str, Cow<str>> {
608    let mut res = Cow::Borrowed("");
609    let mut j = i;
610    while !j.is_empty() {
611        let (k, _) = nom::character::complete::multispace0(j)?;
612        let (k, s) = nom::bytes::complete::escaped(
613            nom::character::complete::none_of("\\\r\n \t"),
614            '\\',
615            nom::character::complete::one_of(r#" :`<>\"#),
616        )(k)?;
617        res = match res {
618            Cow::Borrowed("") => Cow::Borrowed(s),
619            Cow::Borrowed(res_str) => {
620                let mut strg = res_str.to_string();
621                strg.push_str(s);
622                Cow::Owned(strg)
623            }
624            Cow::Owned(mut strg) => {
625                strg.push_str(s);
626                Cow::Owned(strg)
627            }
628        };
629        j = k;
630    }
631
632    Ok((j, res))
633}
634
635/// Replace the following escaped characters:
636///     \\\`\ \:\<\>
637/// with:
638///     \` :<>
639fn rst_escaped_link_destination_transform(i: &str) -> IResult<&str, Cow<str>> {
640    let my_err = |_| {
641        nom::Err::Error(nom::error::Error::new(
642            i,
643            nom::error::ErrorKind::EscapedTransform,
644        ))
645    };
646
647    let c = &*remove_whitespace(i)?.1;
648
649    let s = nom::bytes::complete::escaped_transform::<_, nom::error::Error<_>, _, _, _, _, _, _>(
650        nom::bytes::complete::is_not("\\"),
651        '\\',
652        nom::character::complete::one_of(ESCAPABLE),
653    )(c)
654    .map_err(my_err)?
655    .1;
656
657    // When nothing was changed we can continue with `Borrowed`.
658    if s == i {
659        Ok(("", Cow::Borrowed(i)))
660    } else {
661        Ok(("", Cow::Owned(s)))
662    }
663}
664
665#[cfg(test)]
666mod tests {
667    use super::*;
668    use nom::error::ErrorKind;
669
670    #[test]
671    fn test_rst_text2dest() {
672        let expected = (
673            "abc",
674            (
675                Cow::from("Python home page"),
676                Cow::from("http://www.python.org"),
677                Cow::from(""),
678            ),
679        );
680        assert_eq!(
681            rst_text2dest("`Python home page <http://www.python.org>`__abc").unwrap(),
682            expected
683        );
684
685        let expected = (
686            "abc",
687            (
688                Cow::from(r#"Python<home> page"#),
689                Cow::from("http://www.python.org"),
690                Cow::from(""),
691            ),
692        );
693        assert_eq!(
694            rst_text2dest(r#"`Python\ \<home\> page <http://www.python.org>`__abc"#).unwrap(),
695            expected
696        );
697
698        let expected = (
699            "abc",
700            (
701                Cow::from(r#"my news at <http://python.org>"#),
702                Cow::from("http://news.python.org"),
703                Cow::from(""),
704            ),
705        );
706        assert_eq!(
707            rst_text2dest(r#"`my news at \<http://python.org\> <http://news.python.org>`__abc"#)
708                .unwrap(),
709            expected
710        );
711
712        let expected = (
713            "abc",
714            (
715                Cow::from(r#"my news at <http://python.org>"#),
716                Cow::from(r#"http://news. <python>.org"#),
717                Cow::from(""),
718            ),
719        );
720        assert_eq!(
721            rst_text2dest(
722                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`__abc"#
723            )
724            .unwrap(),
725            expected
726        );
727    }
728
729    #[test]
730    fn test_rst_parse_text2dest_label() {
731        let expected = ("abc", ("Python home page", "http://www.python.org"));
732        assert_eq!(
733            rst_parse_text2target(false, false)("`Python home page <http://www.python.org>`_abc")
734                .unwrap(),
735            expected
736        );
737
738        let expected = nom::Err::Error(nom::error::Error::new("abc", ErrorKind::Tag));
739        assert_eq!(
740            rst_parse_text2target(false, false)("`Python home page <http://www.python.org_>`_abc")
741                .unwrap_err(),
742            expected
743        );
744
745        let expected = nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag));
746        assert_eq!(
747            rst_parse_text2target(false, false)("`_abc").unwrap_err(),
748            expected
749        );
750
751        let expected = ("abc", ("Python home page", "http://www.python.org"));
752        assert_eq!(
753            rst_parse_text2target(true, false)("`Python home page <http://www.python.org>`__abc")
754                .unwrap(),
755            expected
756        );
757
758        let expected = ("abc", (r#"Python\ \<home\> page"#, "http://www.python.org"));
759        assert_eq!(
760            rst_parse_text2target(false, false)(
761                r#"`Python\ \<home\> page <http://www.python.org>`_abc"#
762            )
763            .unwrap(),
764            expected
765        );
766
767        let expected = (
768            "abc",
769            (
770                r#"my news at \<http://python.org\>"#,
771                "http://news.python.org",
772            ),
773        );
774        assert_eq!(
775            rst_parse_text2target(false, false)(
776                r#"`my news at \<http://python.org\> <http://news.python.org>`_abc"#
777            )
778            .unwrap(),
779            expected
780        );
781
782        let expected = (
783            "abc",
784            (
785                r#"my news at \<http\://python.org\>"#,
786                r#"http:// news.\ \<python\>.org"#,
787            ),
788        );
789        assert_eq!(
790            rst_parse_text2target(false, false)(
791                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`_abc"#
792            )
793            .unwrap(),
794            expected
795        );
796
797        let expected = (
798            "abc",
799            (
800                r#"my news at \<http\://python.org\>"#,
801                r#"http:// news.\ \<python\>.org"#,
802            ),
803        );
804        assert_eq!(
805            rst_parse_text2target(false, false)(
806                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`_abc"#
807            )
808            .unwrap(),
809            expected
810        );
811        let expected = ("abc", (r#"rst link text"#, "rst_link_label"));
812        assert_eq!(
813            rst_parse_text2target(false, true)(r#"`rst link text <rst_link_label_>`_abc"#).unwrap(),
814            expected
815        );
816
817        let expected = nom::Err::Error(nom::error::Error::new("abc", ErrorKind::Tag));
818        assert_eq!(
819            rst_parse_text2target(false, true)(r#"`my news <python webpage>`_abc"#).unwrap_err(),
820            expected
821        );
822    }
823
824    #[test]
825    fn test_rst_text2label() {
826        assert_eq!(
827            rst_text2label(r#"link_text_ abc"#),
828            Ok((" abc", (Cow::from("link_text"), Cow::from("link_text"))))
829        );
830        assert_eq!(
831            rst_text2label(r#"`li\:nk text`_ abc"#),
832            Ok((" abc", (Cow::from("li:nk text"), Cow::from("li:nk text"))))
833        );
834        assert_eq!(
835            rst_text2label("`link text`__ abc"),
836            Ok((" abc", (Cow::from("link text"), Cow::from("_"))))
837        );
838    }
839
840    #[test]
841    fn test_rst_parse_text2label() {
842        assert_eq!(
843            rst_parse_text2label("linktext_ abc"),
844            Ok((" abc", ("linktext", "linktext")))
845        );
846
847        assert_eq!(
848            rst_parse_text2label("linktext__ abc"),
849            Ok((" abc", ("linktext", "_")))
850        );
851
852        assert_eq!(
853            rst_parse_text2label("link_text_ abc"),
854            Ok((" abc", ("link_text", "link_text")))
855        );
856
857        assert_eq!(
858            rst_parse_text2label("`link text`_ abc"),
859            Ok((" abc", ("link text", "link text")))
860        );
861
862        assert_eq!(
863            rst_parse_text2label("`link text`_abc"),
864            Ok(("abc", ("link text", "link text")))
865        );
866
867        assert_eq!(
868            rst_parse_text2label("`link_text`_ abc"),
869            Ok((" abc", ("link_text", "link_text")))
870        );
871
872        assert_eq!(
873            rst_parse_text2label("`link text`__ abc"),
874            Ok((" abc", ("link text", "_")))
875        );
876
877        assert_eq!(
878            rst_parse_text2label("`link text<link label_>`_ abc"),
879            Ok((" abc", ("link text", "link label")))
880        );
881    }
882
883    #[test]
884    fn test_rst_label2dest() {
885        let expected = (
886            "\nabc",
887            (
888                Cow::from("Python: home page"),
889                Cow::from("http://www.python.org"),
890                Cow::from(""),
891            ),
892        );
893        assert_eq!(
894            rst_label2dest(".. _`Python: home page`: http://www.python.org\nabc").unwrap(),
895            expected
896        );
897        assert_eq!(
898            rst_label2dest("  .. _`Python: home page`: http://www.py\n     thon.org    \nabc")
899                .unwrap(),
900            expected
901        );
902
903        let expected = nom::Err::Error(nom::error::Error::new(
904            "x .. _`Python: home page`: http://www.python.org\nabc",
905            ErrorKind::Tag,
906        ));
907        assert_eq!(
908            rst_label2dest("x .. _`Python: home page`: http://www.python.org\nabc").unwrap_err(),
909            expected
910        );
911
912        let expected = (
913            "",
914            (
915                Cow::from("Python: `home page`"),
916                Cow::from("http://www.python .org"),
917                Cow::from(""),
918            ),
919        );
920        assert_eq!(
921            rst_label2dest(r#".. _Python\: \`home page\`: http://www.python\ .org"#).unwrap(),
922            expected
923        );
924        assert_eq!(
925            rst_label2dest(r#".. _`Python: \`home page\``: http://www.python\ .org"#).unwrap(),
926            expected
927        );
928
929        let expected = (
930            "",
931            (
932                Cow::from("my news at <http://python.org>"),
933                Cow::from("http://news.python.org"),
934                Cow::from(""),
935            ),
936        );
937        assert_eq!(
938            rst_label2dest(r#".. _`my news at <http://python.org>`: http://news.python.org"#)
939                .unwrap(),
940            expected
941        );
942        assert_eq!(
943            rst_label2dest(r#".. _`my news at \<http://python.org\>`: http://news.python.org"#)
944                .unwrap(),
945            expected
946        );
947        assert_eq!(
948            rst_label2dest(r#".. _my news at \<http\://python.org\>: http://news.python.org"#)
949                .unwrap(),
950            expected
951        );
952
953        let expected = (
954            "",
955            (
956                Cow::from("my news"),
957                Cow::from("http://news.<python>.org"),
958                Cow::from(""),
959            ),
960        );
961        assert_eq!(
962            rst_label2dest(r#".. _my news: http://news.<python>.org"#).unwrap(),
963            expected
964        );
965        assert_eq!(
966            rst_label2dest(r#".. _my news: http://news.\<python\>.org"#).unwrap(),
967            expected
968        );
969
970        let expected = (
971            "",
972            (
973                Cow::from("_"),
974                Cow::from("http://news.python.org"),
975                Cow::from(""),
976            ),
977        );
978        assert_eq!(
979            rst_label2dest(r#".. __: http://news.python.org"#).unwrap(),
980            expected
981        );
982        assert_eq!(
983            rst_label2dest(r#"__ http://news.python.org"#).unwrap(),
984            expected
985        );
986        assert_eq!(
987            rst_label2dest(".. _label: `link destination`_").unwrap_err(),
988            nom::Err::Error(nom::error::Error::new(
989                ".. _label: `link destination`_",
990                ErrorKind::EscapedTransform
991            )),
992        );
993        assert_eq!(
994            rst_label2dest("__ link_destination_").unwrap_err(),
995            nom::Err::Error(nom::error::Error::new(
996                "__ link_destination_",
997                ErrorKind::EscapedTransform
998            )),
999        );
1000    }
1001
1002    #[test]
1003    fn test_rst_label2label() {
1004        assert_eq!(
1005            rst_label2label("   .. _`alt label`: `label`_\nabc"),
1006            Ok(("\nabc", (Cow::from("alt label"), Cow::from("label"))))
1007        );
1008        assert_eq!(
1009            rst_label2label("   .. __: label_\nabc"),
1010            Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
1011        );
1012        assert_eq!(
1013            rst_label2label("   __ label_\nabc"),
1014            Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
1015        );
1016        assert_eq!(
1017            rst_label2label("_label: label").unwrap_err(),
1018            nom::Err::Error(nom::error::Error::new("_label: label", ErrorKind::Tag)),
1019        );
1020        assert_eq!(
1021            rst_label2label("__ destination").unwrap_err(),
1022            nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)),
1023        );
1024    }
1025
1026    #[test]
1027    fn test_rst_parse_label2target() {
1028        let expected = ("", ("Python home page", "http://www.python.org"));
1029        assert_eq!(
1030            rst_parse_label2target(false)("_Python home page: http://www.python.org").unwrap(),
1031            expected
1032        );
1033        assert_eq!(
1034            rst_parse_label2target(false)("_`Python home page`: http://www.python.org").unwrap(),
1035            expected
1036        );
1037
1038        let expected = ("", ("Python: home page", "http://www.python.org"));
1039        assert_eq!(
1040            rst_parse_label2target(false)("_`Python: home page`: http://www.python.org").unwrap(),
1041            expected
1042        );
1043
1044        let expected = ("", (r#"Python\: home page"#, "http://www.python.org"));
1045        assert_eq!(
1046            rst_parse_label2target(false)(r#"_Python\: home page: http://www.python.org"#).unwrap(),
1047            expected
1048        );
1049
1050        let expected = (
1051            "",
1052            ("my news at <http://python.org>", "http://news.python.org"),
1053        );
1054        assert_eq!(
1055            rst_parse_label2target(false)(
1056                r#"_`my news at <http://python.org>`: http://news.python.org"#
1057            )
1058            .unwrap(),
1059            expected
1060        );
1061
1062        let expected = (
1063            "",
1064            (
1065                r#"my news at \<http://python.org\>"#,
1066                "http://news.python.org",
1067            ),
1068        );
1069        assert_eq!(
1070            rst_parse_label2target(false)(
1071                r#"_`my news at \<http://python.org\>`: http://news.python.org"#
1072            )
1073            .unwrap(),
1074            expected
1075        );
1076
1077        let expected = (
1078            "",
1079            (
1080                r#"my news at \<http\://python.org\>"#,
1081                "http://news.python.org",
1082            ),
1083        );
1084        assert_eq!(
1085            rst_parse_label2target(false)(
1086                r#"_my news at \<http\://python.org\>: http://news.python.org"#
1087            )
1088            .unwrap(),
1089            expected
1090        );
1091
1092        let expected = ("", ("_", "http://news.python.org"));
1093        assert_eq!(
1094            rst_parse_label2target(false)(r#"__: http://news.python.org"#).unwrap(),
1095            expected
1096        );
1097
1098        let expected = ("", ("alt_label", "one_word_label"));
1099        assert_eq!(
1100            rst_parse_label2target(true)("_alt_label: one_word_label_").unwrap(),
1101            expected
1102        );
1103
1104        let expected = ("", ("alt label", "more words label"));
1105        assert_eq!(
1106            rst_parse_label2target(true)("_`alt label`: `more words label`_").unwrap(),
1107            expected
1108        );
1109    }
1110
1111    #[test]
1112    fn test_parse_simple_label() {
1113        let expected = ("", "one_word_label");
1114        assert_eq!(rst_parse_simple_label("one_word_label_").unwrap(), expected);
1115
1116        let expected = (" abc", "one_word_label");
1117        assert_eq!(
1118            rst_parse_simple_label("one_word_label_ abc").unwrap(),
1119            expected
1120        );
1121        assert_eq!(
1122            rst_parse_simple_label("`one_word_label`_ abc").unwrap(),
1123            expected
1124        );
1125
1126        let expected = ("", "more words label");
1127        assert_eq!(
1128            rst_parse_simple_label("`more words label`_").unwrap(),
1129            expected
1130        );
1131
1132        let expected = (". abc", "more words label");
1133        assert_eq!(
1134            rst_parse_simple_label("`more words label`_. abc").unwrap(),
1135            expected
1136        );
1137
1138        let expected = ("? abc", "more words label");
1139        assert_eq!(
1140            rst_parse_simple_label("`more words label`_? abc").unwrap(),
1141            expected
1142        );
1143
1144        let expected = (" abc", "more words label");
1145        assert_eq!(
1146            rst_parse_simple_label("`more words label`_ abc").unwrap(),
1147            expected
1148        );
1149
1150        assert_eq!(
1151            rst_parse_simple_label("_").unwrap_err(),
1152            nom::Err::Error(nom::error::Error::new("", ErrorKind::Not)),
1153        );
1154
1155        assert_eq!(
1156            rst_parse_simple_label("``_").unwrap_err(),
1157            nom::Err::Error(nom::error::Error::new("``_", ErrorKind::TakeTill1)),
1158        );
1159    }
1160
1161    #[test]
1162    fn test_rst_explicit_markup_block() {
1163        assert_eq!(
1164            rst_explicit_markup_block(".. ")(".. 11111"),
1165            Ok(("", Cow::from("11111")))
1166        );
1167        assert_eq!(
1168            rst_explicit_markup_block(".. ")("   .. 11111\nout"),
1169            Ok(("\nout", Cow::from("11111")))
1170        );
1171        assert_eq!(
1172            rst_explicit_markup_block(".. ")("   .. 11111\n      222222\n      333333\nout"),
1173            Ok(("\nout", Cow::from("11111 222222 333333")))
1174        );
1175        assert_eq!(
1176            rst_explicit_markup_block(".. ")("   .. first\n      second\n       1indent\nout"),
1177            Ok(("\nout", Cow::from("first second  1indent")))
1178        );
1179        assert_eq!(
1180            rst_explicit_markup_block(".. ")("   ..first"),
1181            Err(nom::Err::Error(nom::error::Error::new(
1182                "..first",
1183                ErrorKind::Tag
1184            )))
1185        );
1186        assert_eq!(
1187            rst_explicit_markup_block(".. ")("x  .. first"),
1188            Err(nom::Err::Error(nom::error::Error::new(
1189                "x  .. first",
1190                ErrorKind::Tag
1191            )))
1192        );
1193    }
1194
1195    #[test]
1196    fn test_rst_escaped_link_text_transform() {
1197        assert_eq!(rst_escaped_link_text_transform(""), Ok(("", Cow::from(""))));
1198        // Different than the link destination version.
1199        assert_eq!(
1200            rst_escaped_link_text_transform("   "),
1201            Ok(("", Cow::from("   ")))
1202        );
1203        // Different than the link destination version.
1204        assert_eq!(
1205            rst_escaped_link_text_transform(r#"\ \ \ "#),
1206            Ok(("", Cow::from("")))
1207        );
1208        assert_eq!(
1209            rst_escaped_link_text_transform(r#"abc`:<>abc"#),
1210            Ok(("", Cow::from(r#"abc`:<>abc"#)))
1211        );
1212        assert_eq!(
1213            rst_escaped_link_text_transform(r#"\:\`\<\>\\"#),
1214            Ok(("", Cow::from(r#":`<>\"#)))
1215        );
1216    }
1217
1218    #[test]
1219    fn test_rst_escaped_link_destination_transform() {
1220        assert_eq!(
1221            rst_escaped_link_destination_transform(""),
1222            Ok(("", Cow::Borrowed("")))
1223        );
1224        // Different than the link name version.
1225        assert_eq!(
1226            rst_escaped_link_destination_transform("  "),
1227            Ok(("", Cow::Borrowed("")))
1228        );
1229        assert_eq!(
1230            rst_escaped_link_destination_transform(" x x"),
1231            Ok(("", Cow::Owned("xx".to_string())))
1232        );
1233        // Different than the link name version.
1234        assert_eq!(
1235            rst_escaped_link_destination_transform(r#"\ \ \ "#),
1236            Ok(("", Cow::Owned("   ".to_string())))
1237        );
1238        assert_eq!(
1239            rst_escaped_link_destination_transform(r#"abc`:<>abc"#),
1240            Ok(("", Cow::Borrowed(r#"abc`:<>abc"#)))
1241        );
1242        assert_eq!(
1243            rst_escaped_link_destination_transform(r#"\:\`\<\>\\"#),
1244            Ok(("", Cow::Owned(r#":`<>\"#.to_string())))
1245        );
1246    }
1247    #[test]
1248    fn test_remove_whitespace() {
1249        assert_eq!(remove_whitespace(" abc "), Ok(("", Cow::Borrowed("abc"))));
1250        assert_eq!(
1251            remove_whitespace(" x x"),
1252            Ok(("", Cow::Owned("xx".to_string())))
1253        );
1254        assert_eq!(remove_whitespace("  \t \r \n"), Ok(("", Cow::from(""))));
1255        assert_eq!(
1256            remove_whitespace(r#"\ \ \ "#),
1257            Ok(("", Cow::Borrowed(r#"\ \ \ "#)))
1258        );
1259        assert_eq!(
1260            remove_whitespace(r#"abc`:<>abc"#),
1261            Ok(("", Cow::Borrowed(r#"abc`:<>abc"#)))
1262        );
1263        assert_eq!(
1264            remove_whitespace(r#"\:\`\<\>\\"#),
1265            Ok(("", Cow::Borrowed(r#"\:\`\<\>\\"#)))
1266        );
1267
1268        assert_eq!(
1269            remove_whitespace("http://www.py\n     thon.org"),
1270            Ok(("", Cow::Owned("http://www.python.org".to_string())))
1271        );
1272    }
1273}