parse_hyperlinks/parser/
restructured_text.rs

1//! This module implements parsers for RestructuredText hyperlinks.
2#![allow(dead_code)]
3#![allow(clippy::type_complexity)]
4
5use crate::parser::parse::LABEL_LEN_MAX;
6use crate::parser::Link;
7use nom::branch::alt;
8use nom::bytes::complete::tag;
9use nom::combinator::*;
10use nom::IResult;
11use std::borrow::Cow;
12
13/// Character that can be escaped with `\`.
14///
15/// Note: If ever you change this, change also
16/// `rst_escaped_link_text_transform()`.
17const ESCAPABLE: &str = r#" `:<>_\"#;
18
19/// Wrapper around `rst_text2dest()` that packs the result in
20/// `Link::Text2Dest`.
21pub fn rst_text2dest_link(i: &str) -> nom::IResult<&str, Link> {
22    let (i, (te, de, ti)) = rst_text2dest(i)?;
23    Ok((i, Link::Text2Dest(te, de, ti)))
24}
25
26/// Parse a RestructuredText _inline hyperlink_.
27///
28/// The parser expects to start at the link start (\`) to succeed.
29/// As rst does not know about link titles,
30/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`
31/// ```
32/// use parse_hyperlinks::parser::Link;
33/// use parse_hyperlinks::parser::restructured_text::rst_text2dest;
34/// use std::borrow::Cow;
35///
36/// assert_eq!(
37///   rst_text2dest("`name <destination>`__abc"),
38///   Ok(("abc", (Cow::from("name"), Cow::from("destination"), Cow::from(""))))
39/// );
40/// ```
41/// A hyperlink reference may directly embed a destination URI or (since Docutils
42/// 0.11) a hyperlink reference within angle brackets `<>` as shown in the
43/// following example:
44/// ```rst
45/// abc `Python home page <http://www.python.org>`__ abc
46/// ```
47/// The bracketed URI must be preceded by whitespace and be the last text
48/// before the end string.
49pub fn rst_text2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
50    let (i, (ln, ld)) = rst_parse_text2target(true, false)(i)?;
51    let ln = rst_escaped_link_text_transform(ln)?.1;
52    let ld = rst_escaped_link_destination_transform(ld)?.1;
53
54    Ok((i, (ln, ld, Cow::Borrowed(""))))
55}
56
57/// Wrapper around `rst_textlabel2dest()` that packs the result in
58/// `Link::TextLabel2Dest`.
59pub fn rst_text_label2dest_link(i: &str) -> nom::IResult<&str, Link> {
60    let (i, (te, de, ti)) = rst_text_label2dest(i)?;
61    Ok((i, Link::TextLabel2Dest(te, de, ti)))
62}
63
64/// Parse a RestructuredText combined _inline hyperlink_ with _link reference definition_.
65///
66/// The parser expects to start at the link start (\`) to succeed.
67/// As rst does not know about link titles,
68/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`.
69/// ```
70/// use parse_hyperlinks::parser::Link;
71/// use parse_hyperlinks::parser::restructured_text::rst_text_label2dest;
72/// use std::borrow::Cow;
73///
74/// assert_eq!(
75///   rst_text_label2dest("`name <destination>`_abc"),
76///   Ok(("abc", (Cow::from("name"), Cow::from("destination"), Cow::from(""))))
77/// );
78/// ```
79/// A hyperlink reference may directly embed a destination URI or (since Docutils
80/// 0.11) a hyperlink reference within angle brackets `<>` as shown in the
81/// following example:
82/// ```rst
83/// abc `Python home page <http://www.python.org>`_ abc
84/// ```
85/// The bracketed URI must be preceded by whitespace and be the last text
86/// before the end string.
87pub fn rst_text_label2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
88    let (i, (ln, ld)) = rst_parse_text2target(false, false)(i)?;
89    let ln = rst_escaped_link_text_transform(ln)?.1;
90    let ld = rst_escaped_link_destination_transform(ld)?.1;
91
92    Ok((i, (ln, ld, Cow::Borrowed(""))))
93}
94
95/// This parser finds rst links of type:
96///     `*<*>`__
97/// or:
98///     `*<*>`_
99///
100/// Escape sequences are recognized and skipped, but not replaced here.
101/// If `anonym==true`: it recognizes:
102///     `*<*>`__
103/// otherwise:
104///     `*<*>`_
105///
106/// If `label==true` (`target==label`): it recognizes
107///     `*<*_>`_?
108/// otherwise (`target==dest`):
109///     `*<*>`_?
110fn rst_parse_text2target(
111    anonym: bool,
112    label: bool,
113) -> impl Fn(&str) -> IResult<&str, (&str, &str)> {
114    move |i: &str| {
115        let (mut i, inner) = nom::sequence::delimited(
116            tag("`"),
117            nom::bytes::complete::escaped(
118                nom::character::complete::none_of(r#"\`"#),
119                '\\',
120                nom::character::complete::one_of(ESCAPABLE),
121            ),
122            tag("`_"),
123        )(i)?;
124
125        if anonym {
126            let (j, _) = nom::character::complete::char('_')(i)?;
127            i = j;
128        };
129
130        // Assure that the next char is not`_`.
131        if !i.is_empty() {
132            let _ = nom::combinator::not(nom::character::complete::char('_'))(i)?;
133        };
134
135        // From here on, we only deal with the inner result of the above.
136        // Take everything until the first unescaped `<`
137        let (inner_rest, link_text): (&str, &str) = nom::bytes::complete::escaped(
138            nom::character::complete::none_of(r#"\<"#),
139            '\\',
140            nom::character::complete::one_of(ESCAPABLE),
141        )(inner)?;
142        // Trim trailing whitespace.
143        let link_text = link_text.trim_end();
144
145        let (j, mut link_dest_label) = nom::sequence::delimited(
146            tag("<"),
147            nom::bytes::complete::escaped(
148                nom::character::complete::none_of(r#"\<>"#),
149                '\\',
150                nom::character::complete::one_of(ESCAPABLE),
151            ),
152            tag(">"),
153        )(inner_rest)?;
154
155        // Fail if there are bytes left between `>` and `\``.
156        let (_, _) = nom::combinator::eof(j)?;
157
158        // Now check if `link_dest_label` is what we are expecting (which depends
159        // on `label`).
160
161        // Fail if `link_dest_label` is empty.
162        let (_, _) = nom::combinator::not(nom::combinator::eof)(link_dest_label)?;
163
164        // Get last char.
165        let last_char_is_ = link_dest_label.is_char_boundary(link_dest_label.len() - 1)
166            && &link_dest_label[link_dest_label.len() - 1..] == "_";
167        // If (`label==true`), we expect trailing `_`, fail otherwise.
168        // If (`label==false`), we fail when there is a trailing `_`.
169        if (label && !last_char_is_) || (!label && last_char_is_) {
170            return Err(nom::Err::Error(nom::error::Error::new(
171                i,
172                nom::error::ErrorKind::Tag,
173            )));
174        };
175        // When label, strip trailing `_`.
176        if label {
177            link_dest_label = &link_dest_label[..link_dest_label.len() - 1];
178        };
179
180        Ok((i, (link_text, link_dest_label)))
181    }
182}
183
184/// Wrapper around `rst_text2dest()` that packs the result in
185/// `Link::Text2Dest`.
186pub fn rst_text2label_link(i: &str) -> nom::IResult<&str, Link> {
187    let (i, (te, la)) = rst_text2label(i)?;
188    Ok((i, Link::Text2Label(te, la)))
189}
190
191/// Parse a RestructuredText _reference link_.
192///
193/// The caller must guarantee, that
194/// * the parser is at the input start (no bytes exist before).
195/// * the preceding bytes are whitespaces or newline, _or_
196/// * the preceding bytes are whitespaces or newline, followed by one of: `([<'"`
197/// ```rust
198/// use parse_hyperlinks::parser::Link;
199/// use parse_hyperlinks::parser::restructured_text::rst_text2label;
200/// use std::borrow::Cow;
201///
202/// assert_eq!(
203///   rst_text2label("linktext_ abc"),
204///   Ok((" abc", (Cow::from("linktext"), Cow::from("linktext"))))
205/// );
206/// assert_eq!(
207///   rst_text2label("`link text`_ abc"),
208///   Ok((" abc", (Cow::from("link text"), Cow::from("link text"))))
209/// );
210/// assert_eq!(
211///   rst_text2label("`link text<link label_>`_ abc"),
212///   Ok((" abc", (Cow::from("link text"), Cow::from("link label"))))
213/// );
214/// assert_eq!(
215///   rst_text2label("`link text`__ abc"),
216///   Ok((" abc", (Cow::from("link text"), Cow::from("_"))))
217/// );
218/// ```
219///
220pub fn rst_text2label(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
221    let (i, (te, la)) = rst_parse_text2label(i)?;
222    let te = rst_escaped_link_text_transform(te)?.1;
223    let la = rst_escaped_link_text_transform(la)?.1;
224
225    Ok((i, (te, la)))
226}
227
228/// Parses a _reference link_. (Doctree element `reference`).
229///
230/// Named hyperlink references:
231/// No start-string, end-string = `_.
232/// Start-string = "`", end-string = `\`_`. (Phrase references.)
233/// Anonymous hyperlink references:
234/// No start-string, end-string = `__`.
235/// Start-string = "`", end-string = `\`__`. (Phrase references.)
236///
237///
238/// Hyperlink references are indicated by a trailing underscore, "_", except for
239/// standalone hyperlinks which are recognized independently.
240///
241/// Important: before this parser try `rst_text2dest()` first!
242///
243/// The caller must guarantee, that either:
244/// * we are at the input start -or-
245/// * the byte just before was a whitespace (including newline)!
246///
247/// For named references in reStructuredText `link_text` and `link_label`
248/// are the same. By convention we return for anonymous references:
249/// `link_label='_'`.
250///
251/// The parser checks that this _reference link_ is followed by a whitespace
252/// without consuming it.
253///
254fn rst_parse_text2label(i: &str) -> nom::IResult<&str, (&str, &str)> {
255    let (mut i, (link_text, mut link_label)) = alt((
256        rst_parse_text2target(false, true),
257        nom::combinator::map(rst_parse_simple_label, |s| (s, s)),
258    ))(i)?;
259
260    // Is this an anonymous reference? Consume the second `_` also.
261    if let (j, Some(_)) = nom::combinator::opt(nom::character::complete::char('_'))(i)? {
262        link_label = "_";
263        i = j;
264    };
265
266    Ok((i, (link_text, link_label)))
267}
268
269/// Wrapper around `rst_label2dest()` that packs the result in
270/// `Link::Label2Dest`.
271pub fn rst_label2dest_link(i: &str) -> nom::IResult<&str, Link> {
272    let (i, (l, d, t)) = rst_label2dest(i)?;
273    Ok((i, Link::Label2Dest(l, d, t)))
274}
275
276/// Parse a reStructuredText _link reference definition_.
277///
278/// This parser consumes until the end of the line. As rst does not know about link titles,
279/// the parser always returns an empty `link_title` as `Cow::Borrowed("")`.
280/// ```
281/// use parse_hyperlinks::parser::Link;
282/// use parse_hyperlinks::parser::restructured_text::rst_label2dest;
283/// use std::borrow::Cow;
284///
285/// assert_eq!(
286///   rst_label2dest("   .. _`label`: destination\nabc"),
287///   Ok(("\nabc", (Cow::from("label"), Cow::from("destination"), Cow::from(""))))
288/// );
289/// assert_eq!(
290///   rst_label2dest("   .. __: destination\nabc"),
291///   Ok(("\nabc", (Cow::from("_"), Cow::from("destination"), Cow::from(""))))
292/// );
293/// assert_eq!(
294///   rst_label2dest("   __ destination\nabc"),
295///   Ok(("\nabc", (Cow::from("_"), Cow::from("destination"), Cow::from(""))))
296/// );
297/// ```
298/// Here some examples for link references:
299/// ```rst
300/// .. _Python home page: http://www.python.org
301/// .. _`Python: home page`: http://www.python.org
302/// ```
303/// See unit test `test_rst_label2dest()` for more examples.
304pub fn rst_label2dest(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>, Cow<str>)> {
305    let (i, (l, d)) = rst_label2target(false, i)?;
306    Ok((i, (l, d, Cow::from(""))))
307}
308
309/// Wrapper around `rst_label2label()` that packs the result in
310/// `Link::Label2Label`.
311pub fn rst_label2label_link(i: &str) -> nom::IResult<&str, Link> {
312    let (i, (l1, l2)) = rst_label2label(i)?;
313    Ok((i, Link::Label2Label(l1, l2)))
314}
315
316/// Parse a reStructuredText _link reference to link reference definition_.
317/// This type defines an alias (alternative name) for a link reference:
318/// ```
319/// use parse_hyperlinks::parser::Link;
320/// use parse_hyperlinks::parser::restructured_text::rst_label2label;
321/// use std::borrow::Cow;
322///
323/// assert_eq!(
324///   rst_label2label("   .. _`alt label`: `label`_\nabc"),
325///   Ok(("\nabc", (Cow::from("alt label"), Cow::from("label"))))
326/// );
327/// assert_eq!(
328///   rst_label2label("   .. __: label_\nabc"),
329///   Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
330/// );
331/// assert_eq!(
332///   rst_label2label("   __ label_\nabc"),
333///   Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
334/// );
335/// ```
336pub fn rst_label2label(i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
337    rst_label2target(true, i)
338}
339
340/// Parser for _link_reference_definitions_:
341/// * `label==false`:  the link is of type `Label2Dest`
342/// * `label==true`: the link is of type `Label2Label`
343fn rst_label2target(label: bool, i: &str) -> nom::IResult<&str, (Cow<str>, Cow<str>)> {
344    let my_err = |_| {
345        nom::Err::Error(nom::error::Error::new(
346            i,
347            nom::error::ErrorKind::EscapedTransform,
348        ))
349    };
350
351    // If there is a block start? What kind of?
352    let (i, c, block_header_is__) =
353        if let (i, Some(c)) = nom::combinator::opt(rst_explicit_markup_block(".. "))(i)? {
354            (i, c, false)
355        } else {
356            let (i, c) = rst_explicit_markup_block("__ ")(i)?;
357            (i, c, true)
358        };
359
360    let (source, target) = match c {
361        Cow::Borrowed(s) => {
362            let (_, (ls, lt)) = if !block_header_is__ {
363                rst_parse_label2target(label)(s)?
364            } else if label {
365                // This is supposed to be a label.
366                ("", ("_", rst_parse_simple_label(s)?.1))
367            } else {
368                // This is supposed to be a destination (url).
369                ("", ("_", s))
370            };
371            // If the target is a destination (not a label), the last char must not be `_`.
372            if !label {
373                let _ = nom::combinator::not(rst_parse_simple_label)(lt).map_err(my_err)?;
374            };
375            (
376                rst_escaped_link_text_transform(ls)?.1,
377                rst_escaped_link_destination_transform(lt)?.1,
378            )
379        }
380
381        Cow::Owned(strg) => {
382            let (_, (ls, lt)) = if !block_header_is__ {
383                rst_parse_label2target(label)(&strg).map_err(my_err)?
384            } else if label {
385                // This is supposed to be a label.
386                let s = rst_parse_simple_label(&strg).map_err(my_err)?.1;
387                ("", ("_", s))
388            } else {
389                // This is supposed to be a destination (url).
390                ("", ("_", strg.as_str()))
391            };
392            // If the target is a destination (not a label), the last char must not be `_`.
393            if !label {
394                let _ = nom::combinator::not(rst_parse_simple_label)(lt).map_err(my_err)?;
395            };
396            let ls = Cow::Owned(
397                rst_escaped_link_text_transform(ls)
398                    .map_err(my_err)?
399                    .1
400                    .to_string(),
401            );
402            let lt = Cow::Owned(
403                rst_escaped_link_destination_transform(lt)
404                    .map_err(my_err)?
405                    .1
406                    .to_string(),
407            );
408            (ls, lt)
409        }
410    };
411
412    // We do not need to consume whitespace until the end of the line,
413    // because `rst_explicit_markup_block()` had stripped the whitespace
414    // already.
415
416    Ok((i, (source, target)))
417}
418
419/// The parser recognizes `Label2Dest` links (`label==false`):
420///     _label: dest
421/// or `Label2Label` links (`label==true):
422///     _alt_label: label_
423/// It does not perform any escape character transformation.
424fn rst_parse_label2target(label: bool) -> impl Fn(&str) -> IResult<&str, (&str, &str)> {
425    move |i: &str| {
426        let (i, link_text) = alt((
427            nom::sequence::delimited(
428                tag("_`"),
429                nom::bytes::complete::escaped(
430                    nom::character::complete::none_of(r#"\`"#),
431                    '\\',
432                    nom::character::complete::one_of(ESCAPABLE),
433                ),
434                tag("`: "),
435            ),
436            nom::sequence::delimited(
437                tag("_"),
438                nom::bytes::complete::escaped(
439                    nom::character::complete::none_of(r#"\:"#),
440                    '\\',
441                    nom::character::complete::one_of(ESCAPABLE),
442                ),
443                tag(": "),
444            ),
445            nom::combinator::value("_", tag("__: ")),
446        ))(i)?;
447
448        let link_target = if label {
449            // The target is another label.
450            rst_parse_simple_label(i)?.1
451        } else {
452            // The target is a destination.
453            i
454        };
455
456        Ok(("", (link_text, link_target)))
457    }
458}
459
460/// This parser consumes a simple label:
461///     one_word_label_
462/// or
463///     `more words label`_
464fn rst_parse_simple_label(i: &str) -> nom::IResult<&str, &str> {
465    // Consumes and returns a word ending with `_`.
466    // Strips off one the trailing `_` before returning the result.
467    fn take_word_consume_first_ending_underscore(i: &str) -> nom::IResult<&str, &str> {
468        let mut i = i;
469        let (k, mut r) = nom::bytes::complete::take_till1(|c: char| {
470            !(c.is_alphanumeric() || c == '-' || c == '_')
471        })(i)?;
472        // Is `r` ending with `__`? There should be at least 2 bytes: `"__".len()`
473        if r.len() >= 3 && r.is_char_boundary(r.len() - 2) && &r[r.len() - 2..] == "__" {
474            // Consume one `_`, but keep one `_` in remaining bytes.
475            i = &i[r.len() - 1..];
476            // Strip two `__` from result.
477            r = &r[..r.len() - 2];
478        // Is `r` ending with `_`? There should be at least 1 byte: `"_".len()`.
479        } else if !r.is_empty() && r.is_char_boundary(r.len() - 1) && &r[r.len() - 1..] == "_" {
480            // Remaining bytes.
481            i = k;
482            // Strip `_` from result.
483            r = &r[..r.len() - 1]
484        } else {
485            return Err(nom::Err::Error(nom::error::Error::new(
486                k,
487                nom::error::ErrorKind::Tag,
488            )));
489        };
490
491        Ok((i, r))
492    }
493
494    let (i, r) = nom::combinator::verify(
495        alt((
496            nom::sequence::delimited(
497                tag("`"),
498                nom::bytes::complete::escaped(
499                    nom::character::complete::none_of(r#"\`"#),
500                    '\\',
501                    nom::character::complete::one_of(ESCAPABLE),
502                ),
503                tag("`_"),
504            ),
505            take_word_consume_first_ending_underscore,
506        )),
507        |s: &str| s.len() <= LABEL_LEN_MAX,
508    )(i)?;
509
510    // Return error if label is empty.
511    let _ = nom::combinator::not(alt((nom::combinator::eof, tag("``"))))(r)?;
512
513    Ok((i, r))
514}
515
516/// This parses an explicit markup block.
517/// The parser expects to start at the beginning of the line.
518/// Syntax diagram:
519/// ```text
520/// +-------+----------------------+
521/// | ".. " | in  1                |
522/// +-------+ in  2                |
523///         |    in  3             |
524///         +----------------------+
525/// out
526/// ```
527/// An explicit markup block is a text block:
528/// * whose first line begins with ".." followed by whitespace (the "explicit
529///   markup start"),
530/// * whose second and subsequent lines (if any) are indented relative to the
531///   first, and
532/// * which ends before an unindented line
533/// As with external hyperlink targets, the link block of an indirect
534/// hyperlink target may begin on the same line as the explicit markup start
535/// or the next line. It may also be split over multiple lines, in which case
536/// the lines are joined with whitespace before being normalized.
537fn rst_explicit_markup_block<'a>(
538    block_header: &'a str,
539) -> impl Fn(&'a str) -> IResult<&'a str, Cow<'a, str>> {
540    move |i: &'a str| {
541        fn indent<'a>(wsp1: &'a str, wsp2: &'a str) -> impl Fn(&'a str) -> IResult<&'a str, ()> {
542            move |i: &str| {
543                let (i, _) = nom::character::complete::line_ending(i)?;
544                let (i, _) = nom::bytes::complete::tag(wsp1)(i)?;
545                let (i, _) = nom::bytes::complete::tag(wsp2)(i)?;
546                Ok((i, ()))
547            }
548        }
549
550        let (i, (wsp1, wsp2)) = nom::sequence::pair(
551            nom::character::complete::space0,
552            nom::combinator::map(nom::bytes::complete::tag(block_header), |_| "   "),
553        )(i)?;
554
555        let (j, v) = nom::multi::separated_list1(
556            indent(wsp1, wsp2),
557            nom::character::complete::not_line_ending,
558        )(i)?;
559
560        // If the block consists of only one line return now.
561        if v.len() == 1 {
562            return Ok((j, Cow::Borrowed(v[0])));
563        };
564
565        let mut s = String::new();
566        let mut is_first = true;
567
568        for subs in &v {
569            if !is_first {
570                s.push(' ');
571            }
572            s.push_str(subs);
573            is_first = false;
574        }
575
576        Ok((j, Cow::from(s)))
577    }
578}
579
580/// Replace the following escaped characters:
581///     \\\`\ \:\<\>
582/// with:
583///     \`:<>
584/// Preserves usual whitespace, but removes `\ `.
585fn rst_escaped_link_text_transform(i: &str) -> IResult<&str, Cow<str>> {
586    nom::combinator::map(
587        nom::bytes::complete::escaped_transform(
588            nom::bytes::complete::is_not("\\"),
589            '\\',
590            // This list is the same as `ESCAPABLE`.
591            alt((
592                tag("\\"),
593                tag("`"),
594                tag(":"),
595                tag("<"),
596                tag(">"),
597                tag("_"),
598                value("", tag(" ")),
599            )),
600        ),
601        |s| if s == i { Cow::from(i) } else { Cow::from(s) },
602    )(i)
603}
604
605/// Deletes all whitespace, but keeps one space for each `\ `.
606fn remove_whitespace(i: &str) -> IResult<&str, Cow<str>> {
607    let mut res = Cow::Borrowed("");
608    let mut j = i;
609    while !j.is_empty() {
610        let (k, _) = nom::character::complete::multispace0(j)?;
611        let (k, s) = nom::bytes::complete::escaped(
612            nom::character::complete::none_of("\\\r\n \t"),
613            '\\',
614            nom::character::complete::one_of(r#" :`<>\"#),
615        )(k)?;
616        res = match res {
617            Cow::Borrowed("") => Cow::Borrowed(s),
618            Cow::Borrowed(res_str) => {
619                let mut strg = res_str.to_string();
620                strg.push_str(s);
621                Cow::Owned(strg)
622            }
623            Cow::Owned(mut strg) => {
624                strg.push_str(s);
625                Cow::Owned(strg)
626            }
627        };
628        j = k;
629    }
630
631    Ok((j, res))
632}
633
634/// Replace the following escaped characters:
635///     \\\`\ \:\<\>
636/// with:
637///     \` :<>
638fn rst_escaped_link_destination_transform(i: &str) -> IResult<&str, Cow<str>> {
639    let my_err = |_| {
640        nom::Err::Error(nom::error::Error::new(
641            i,
642            nom::error::ErrorKind::EscapedTransform,
643        ))
644    };
645
646    let c = &*remove_whitespace(i)?.1;
647
648    let s = nom::bytes::complete::escaped_transform::<_, nom::error::Error<_>, _, _, _, _, _, _>(
649        nom::bytes::complete::is_not("\\"),
650        '\\',
651        nom::character::complete::one_of(ESCAPABLE),
652    )(c)
653    .map_err(my_err)?
654    .1;
655
656    // When nothing was changed we can continue with `Borrowed`.
657    if s == i {
658        Ok(("", Cow::Borrowed(i)))
659    } else {
660        Ok(("", Cow::Owned(s)))
661    }
662}
663
664#[cfg(test)]
665mod tests {
666    use super::*;
667    use nom::error::ErrorKind;
668
669    #[test]
670    fn test_rst_text2dest() {
671        let expected = (
672            "abc",
673            (
674                Cow::from("Python home page"),
675                Cow::from("http://www.python.org"),
676                Cow::from(""),
677            ),
678        );
679        assert_eq!(
680            rst_text2dest("`Python home page <http://www.python.org>`__abc").unwrap(),
681            expected
682        );
683
684        let expected = (
685            "abc",
686            (
687                Cow::from(r#"Python<home> page"#),
688                Cow::from("http://www.python.org"),
689                Cow::from(""),
690            ),
691        );
692        assert_eq!(
693            rst_text2dest(r#"`Python\ \<home\> page <http://www.python.org>`__abc"#).unwrap(),
694            expected
695        );
696
697        let expected = (
698            "abc",
699            (
700                Cow::from(r#"my news at <http://python.org>"#),
701                Cow::from("http://news.python.org"),
702                Cow::from(""),
703            ),
704        );
705        assert_eq!(
706            rst_text2dest(r#"`my news at \<http://python.org\> <http://news.python.org>`__abc"#)
707                .unwrap(),
708            expected
709        );
710
711        let expected = (
712            "abc",
713            (
714                Cow::from(r#"my news at <http://python.org>"#),
715                Cow::from(r#"http://news. <python>.org"#),
716                Cow::from(""),
717            ),
718        );
719        assert_eq!(
720            rst_text2dest(
721                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`__abc"#
722            )
723            .unwrap(),
724            expected
725        );
726    }
727
728    #[test]
729    fn test_rst_parse_text2dest_label() {
730        let expected = ("abc", ("Python home page", "http://www.python.org"));
731        assert_eq!(
732            rst_parse_text2target(false, false)("`Python home page <http://www.python.org>`_abc")
733                .unwrap(),
734            expected
735        );
736
737        let expected = nom::Err::Error(nom::error::Error::new("abc", ErrorKind::Tag));
738        assert_eq!(
739            rst_parse_text2target(false, false)("`Python home page <http://www.python.org_>`_abc")
740                .unwrap_err(),
741            expected
742        );
743
744        let expected = nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag));
745        assert_eq!(
746            rst_parse_text2target(false, false)("`_abc").unwrap_err(),
747            expected
748        );
749
750        let expected = ("abc", ("Python home page", "http://www.python.org"));
751        assert_eq!(
752            rst_parse_text2target(true, false)("`Python home page <http://www.python.org>`__abc")
753                .unwrap(),
754            expected
755        );
756
757        let expected = ("abc", (r#"Python\ \<home\> page"#, "http://www.python.org"));
758        assert_eq!(
759            rst_parse_text2target(false, false)(
760                r#"`Python\ \<home\> page <http://www.python.org>`_abc"#
761            )
762            .unwrap(),
763            expected
764        );
765
766        let expected = (
767            "abc",
768            (
769                r#"my news at \<http://python.org\>"#,
770                "http://news.python.org",
771            ),
772        );
773        assert_eq!(
774            rst_parse_text2target(false, false)(
775                r#"`my news at \<http://python.org\> <http://news.python.org>`_abc"#
776            )
777            .unwrap(),
778            expected
779        );
780
781        let expected = (
782            "abc",
783            (
784                r#"my news at \<http\://python.org\>"#,
785                r#"http:// news.\ \<python\>.org"#,
786            ),
787        );
788        assert_eq!(
789            rst_parse_text2target(false, false)(
790                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`_abc"#
791            )
792            .unwrap(),
793            expected
794        );
795
796        let expected = (
797            "abc",
798            (
799                r#"my news at \<http\://python.org\>"#,
800                r#"http:// news.\ \<python\>.org"#,
801            ),
802        );
803        assert_eq!(
804            rst_parse_text2target(false, false)(
805                r#"`my news at \<http\://python.org\> <http:// news.\ \<python\>.org>`_abc"#
806            )
807            .unwrap(),
808            expected
809        );
810        let expected = ("abc", (r#"rst link text"#, "rst_link_label"));
811        assert_eq!(
812            rst_parse_text2target(false, true)(r#"`rst link text <rst_link_label_>`_abc"#).unwrap(),
813            expected
814        );
815
816        let expected = nom::Err::Error(nom::error::Error::new("abc", ErrorKind::Tag));
817        assert_eq!(
818            rst_parse_text2target(false, true)(r#"`my news <python webpage>`_abc"#).unwrap_err(),
819            expected
820        );
821    }
822
823    #[test]
824    fn test_rst_text2label() {
825        assert_eq!(
826            rst_text2label(r#"link_text_ abc"#),
827            Ok((" abc", (Cow::from("link_text"), Cow::from("link_text"))))
828        );
829        assert_eq!(
830            rst_text2label(r#"`li\:nk text`_ abc"#),
831            Ok((" abc", (Cow::from("li:nk text"), Cow::from("li:nk text"))))
832        );
833        assert_eq!(
834            rst_text2label("`link text`__ abc"),
835            Ok((" abc", (Cow::from("link text"), Cow::from("_"))))
836        );
837    }
838
839    #[test]
840    fn test_rst_parse_text2label() {
841        assert_eq!(
842            rst_parse_text2label("linktext_ abc"),
843            Ok((" abc", ("linktext", "linktext")))
844        );
845
846        assert_eq!(
847            rst_parse_text2label("linktext__ abc"),
848            Ok((" abc", ("linktext", "_")))
849        );
850
851        assert_eq!(
852            rst_parse_text2label("link_text_ abc"),
853            Ok((" abc", ("link_text", "link_text")))
854        );
855
856        assert_eq!(
857            rst_parse_text2label("`link text`_ abc"),
858            Ok((" abc", ("link text", "link text")))
859        );
860
861        assert_eq!(
862            rst_parse_text2label("`link text`_abc"),
863            Ok(("abc", ("link text", "link text")))
864        );
865
866        assert_eq!(
867            rst_parse_text2label("`link_text`_ abc"),
868            Ok((" abc", ("link_text", "link_text")))
869        );
870
871        assert_eq!(
872            rst_parse_text2label("`link text`__ abc"),
873            Ok((" abc", ("link text", "_")))
874        );
875
876        assert_eq!(
877            rst_parse_text2label("`link text<link label_>`_ abc"),
878            Ok((" abc", ("link text", "link label")))
879        );
880    }
881
882    #[test]
883    fn test_rst_label2dest() {
884        let expected = (
885            "\nabc",
886            (
887                Cow::from("Python: home page"),
888                Cow::from("http://www.python.org"),
889                Cow::from(""),
890            ),
891        );
892        assert_eq!(
893            rst_label2dest(".. _`Python: home page`: http://www.python.org\nabc").unwrap(),
894            expected
895        );
896        assert_eq!(
897            rst_label2dest("  .. _`Python: home page`: http://www.py\n     thon.org    \nabc")
898                .unwrap(),
899            expected
900        );
901
902        let expected = nom::Err::Error(nom::error::Error::new(
903            "x .. _`Python: home page`: http://www.python.org\nabc",
904            ErrorKind::Tag,
905        ));
906        assert_eq!(
907            rst_label2dest("x .. _`Python: home page`: http://www.python.org\nabc").unwrap_err(),
908            expected
909        );
910
911        let expected = (
912            "",
913            (
914                Cow::from("Python: `home page`"),
915                Cow::from("http://www.python .org"),
916                Cow::from(""),
917            ),
918        );
919        assert_eq!(
920            rst_label2dest(r#".. _Python\: \`home page\`: http://www.python\ .org"#).unwrap(),
921            expected
922        );
923        assert_eq!(
924            rst_label2dest(r#".. _`Python: \`home page\``: http://www.python\ .org"#).unwrap(),
925            expected
926        );
927
928        let expected = (
929            "",
930            (
931                Cow::from("my news at <http://python.org>"),
932                Cow::from("http://news.python.org"),
933                Cow::from(""),
934            ),
935        );
936        assert_eq!(
937            rst_label2dest(r#".. _`my news at <http://python.org>`: http://news.python.org"#)
938                .unwrap(),
939            expected
940        );
941        assert_eq!(
942            rst_label2dest(r#".. _`my news at \<http://python.org\>`: http://news.python.org"#)
943                .unwrap(),
944            expected
945        );
946        assert_eq!(
947            rst_label2dest(r#".. _my news at \<http\://python.org\>: http://news.python.org"#)
948                .unwrap(),
949            expected
950        );
951
952        let expected = (
953            "",
954            (
955                Cow::from("my news"),
956                Cow::from("http://news.<python>.org"),
957                Cow::from(""),
958            ),
959        );
960        assert_eq!(
961            rst_label2dest(r#".. _my news: http://news.<python>.org"#).unwrap(),
962            expected
963        );
964        assert_eq!(
965            rst_label2dest(r#".. _my news: http://news.\<python\>.org"#).unwrap(),
966            expected
967        );
968
969        let expected = (
970            "",
971            (
972                Cow::from("_"),
973                Cow::from("http://news.python.org"),
974                Cow::from(""),
975            ),
976        );
977        assert_eq!(
978            rst_label2dest(r#".. __: http://news.python.org"#).unwrap(),
979            expected
980        );
981        assert_eq!(
982            rst_label2dest(r#"__ http://news.python.org"#).unwrap(),
983            expected
984        );
985        assert_eq!(
986            rst_label2dest(".. _label: `link destination`_").unwrap_err(),
987            nom::Err::Error(nom::error::Error::new(
988                ".. _label: `link destination`_",
989                ErrorKind::EscapedTransform
990            )),
991        );
992        assert_eq!(
993            rst_label2dest("__ link_destination_").unwrap_err(),
994            nom::Err::Error(nom::error::Error::new(
995                "__ link_destination_",
996                ErrorKind::EscapedTransform
997            )),
998        );
999    }
1000
1001    #[test]
1002    fn test_rst_label2label() {
1003        assert_eq!(
1004            rst_label2label("   .. _`alt label`: `label`_\nabc"),
1005            Ok(("\nabc", (Cow::from("alt label"), Cow::from("label"))))
1006        );
1007        assert_eq!(
1008            rst_label2label("   .. __: label_\nabc"),
1009            Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
1010        );
1011        assert_eq!(
1012            rst_label2label("   __ label_\nabc"),
1013            Ok(("\nabc", (Cow::from("_"), Cow::from("label"))))
1014        );
1015        assert_eq!(
1016            rst_label2label("_label: label").unwrap_err(),
1017            nom::Err::Error(nom::error::Error::new("_label: label", ErrorKind::Tag)),
1018        );
1019        assert_eq!(
1020            rst_label2label("__ destination").unwrap_err(),
1021            nom::Err::Error(nom::error::Error::new("", ErrorKind::Tag)),
1022        );
1023    }
1024
1025    #[test]
1026    fn test_rst_parse_label2target() {
1027        let expected = ("", ("Python home page", "http://www.python.org"));
1028        assert_eq!(
1029            rst_parse_label2target(false)("_Python home page: http://www.python.org").unwrap(),
1030            expected
1031        );
1032        assert_eq!(
1033            rst_parse_label2target(false)("_`Python home page`: http://www.python.org").unwrap(),
1034            expected
1035        );
1036
1037        let expected = ("", ("Python: home page", "http://www.python.org"));
1038        assert_eq!(
1039            rst_parse_label2target(false)("_`Python: home page`: http://www.python.org").unwrap(),
1040            expected
1041        );
1042
1043        let expected = ("", (r#"Python\: home page"#, "http://www.python.org"));
1044        assert_eq!(
1045            rst_parse_label2target(false)(r#"_Python\: home page: http://www.python.org"#).unwrap(),
1046            expected
1047        );
1048
1049        let expected = (
1050            "",
1051            ("my news at <http://python.org>", "http://news.python.org"),
1052        );
1053        assert_eq!(
1054            rst_parse_label2target(false)(
1055                r#"_`my news at <http://python.org>`: http://news.python.org"#
1056            )
1057            .unwrap(),
1058            expected
1059        );
1060
1061        let expected = (
1062            "",
1063            (
1064                r#"my news at \<http://python.org\>"#,
1065                "http://news.python.org",
1066            ),
1067        );
1068        assert_eq!(
1069            rst_parse_label2target(false)(
1070                r#"_`my news at \<http://python.org\>`: http://news.python.org"#
1071            )
1072            .unwrap(),
1073            expected
1074        );
1075
1076        let expected = (
1077            "",
1078            (
1079                r#"my news at \<http\://python.org\>"#,
1080                "http://news.python.org",
1081            ),
1082        );
1083        assert_eq!(
1084            rst_parse_label2target(false)(
1085                r#"_my news at \<http\://python.org\>: http://news.python.org"#
1086            )
1087            .unwrap(),
1088            expected
1089        );
1090
1091        let expected = ("", ("_", "http://news.python.org"));
1092        assert_eq!(
1093            rst_parse_label2target(false)(r#"__: http://news.python.org"#).unwrap(),
1094            expected
1095        );
1096
1097        let expected = ("", ("alt_label", "one_word_label"));
1098        assert_eq!(
1099            rst_parse_label2target(true)("_alt_label: one_word_label_").unwrap(),
1100            expected
1101        );
1102
1103        let expected = ("", ("alt label", "more words label"));
1104        assert_eq!(
1105            rst_parse_label2target(true)("_`alt label`: `more words label`_").unwrap(),
1106            expected
1107        );
1108    }
1109
1110    #[test]
1111    fn test_parse_simple_label() {
1112        let expected = ("", "one_word_label");
1113        assert_eq!(rst_parse_simple_label("one_word_label_").unwrap(), expected);
1114
1115        let expected = (" abc", "one_word_label");
1116        assert_eq!(
1117            rst_parse_simple_label("one_word_label_ abc").unwrap(),
1118            expected
1119        );
1120        assert_eq!(
1121            rst_parse_simple_label("`one_word_label`_ abc").unwrap(),
1122            expected
1123        );
1124
1125        let expected = ("", "more words label");
1126        assert_eq!(
1127            rst_parse_simple_label("`more words label`_").unwrap(),
1128            expected
1129        );
1130
1131        let expected = (". abc", "more words label");
1132        assert_eq!(
1133            rst_parse_simple_label("`more words label`_. abc").unwrap(),
1134            expected
1135        );
1136
1137        let expected = ("? abc", "more words label");
1138        assert_eq!(
1139            rst_parse_simple_label("`more words label`_? abc").unwrap(),
1140            expected
1141        );
1142
1143        let expected = (" abc", "more words label");
1144        assert_eq!(
1145            rst_parse_simple_label("`more words label`_ abc").unwrap(),
1146            expected
1147        );
1148
1149        assert_eq!(
1150            rst_parse_simple_label("_").unwrap_err(),
1151            nom::Err::Error(nom::error::Error::new("", ErrorKind::Not)),
1152        );
1153
1154        assert_eq!(
1155            rst_parse_simple_label("``_").unwrap_err(),
1156            nom::Err::Error(nom::error::Error::new("``_", ErrorKind::TakeTill1)),
1157        );
1158    }
1159
1160    #[test]
1161    fn test_rst_explicit_markup_block() {
1162        assert_eq!(
1163            rst_explicit_markup_block(".. ")(".. 11111"),
1164            Ok(("", Cow::from("11111")))
1165        );
1166        assert_eq!(
1167            rst_explicit_markup_block(".. ")("   .. 11111\nout"),
1168            Ok(("\nout", Cow::from("11111")))
1169        );
1170        assert_eq!(
1171            rst_explicit_markup_block(".. ")("   .. 11111\n      222222\n      333333\nout"),
1172            Ok(("\nout", Cow::from("11111 222222 333333")))
1173        );
1174        assert_eq!(
1175            rst_explicit_markup_block(".. ")("   .. first\n      second\n       1indent\nout"),
1176            Ok(("\nout", Cow::from("first second  1indent")))
1177        );
1178        assert_eq!(
1179            rst_explicit_markup_block(".. ")("   ..first"),
1180            Err(nom::Err::Error(nom::error::Error::new(
1181                "..first",
1182                ErrorKind::Tag
1183            )))
1184        );
1185        assert_eq!(
1186            rst_explicit_markup_block(".. ")("x  .. first"),
1187            Err(nom::Err::Error(nom::error::Error::new(
1188                "x  .. first",
1189                ErrorKind::Tag
1190            )))
1191        );
1192    }
1193
1194    #[test]
1195    fn test_rst_escaped_link_text_transform() {
1196        assert_eq!(rst_escaped_link_text_transform(""), Ok(("", Cow::from(""))));
1197        // Different than the link destination version.
1198        assert_eq!(
1199            rst_escaped_link_text_transform("   "),
1200            Ok(("", Cow::from("   ")))
1201        );
1202        // Different than the link destination version.
1203        assert_eq!(
1204            rst_escaped_link_text_transform(r#"\ \ \ "#),
1205            Ok(("", Cow::from("")))
1206        );
1207        assert_eq!(
1208            rst_escaped_link_text_transform(r#"abc`:<>abc"#),
1209            Ok(("", Cow::from(r#"abc`:<>abc"#)))
1210        );
1211        assert_eq!(
1212            rst_escaped_link_text_transform(r#"\:\`\<\>\\"#),
1213            Ok(("", Cow::from(r#":`<>\"#)))
1214        );
1215    }
1216
1217    #[test]
1218    fn test_rst_escaped_link_destination_transform() {
1219        assert_eq!(
1220            rst_escaped_link_destination_transform(""),
1221            Ok(("", Cow::Borrowed("")))
1222        );
1223        // Different than the link name version.
1224        assert_eq!(
1225            rst_escaped_link_destination_transform("  "),
1226            Ok(("", Cow::Borrowed("")))
1227        );
1228        assert_eq!(
1229            rst_escaped_link_destination_transform(" x x"),
1230            Ok(("", Cow::Owned("xx".to_string())))
1231        );
1232        // Different than the link name version.
1233        assert_eq!(
1234            rst_escaped_link_destination_transform(r#"\ \ \ "#),
1235            Ok(("", Cow::Owned("   ".to_string())))
1236        );
1237        assert_eq!(
1238            rst_escaped_link_destination_transform(r#"abc`:<>abc"#),
1239            Ok(("", Cow::Borrowed(r#"abc`:<>abc"#)))
1240        );
1241        assert_eq!(
1242            rst_escaped_link_destination_transform(r#"\:\`\<\>\\"#),
1243            Ok(("", Cow::Owned(r#":`<>\"#.to_string())))
1244        );
1245    }
1246    #[test]
1247    fn test_remove_whitespace() {
1248        assert_eq!(remove_whitespace(" abc "), Ok(("", Cow::Borrowed("abc"))));
1249        assert_eq!(
1250            remove_whitespace(" x x"),
1251            Ok(("", Cow::Owned("xx".to_string())))
1252        );
1253        assert_eq!(remove_whitespace("  \t \r \n"), Ok(("", Cow::from(""))));
1254        assert_eq!(
1255            remove_whitespace(r#"\ \ \ "#),
1256            Ok(("", Cow::Borrowed(r#"\ \ \ "#)))
1257        );
1258        assert_eq!(
1259            remove_whitespace(r#"abc`:<>abc"#),
1260            Ok(("", Cow::Borrowed(r#"abc`:<>abc"#)))
1261        );
1262        assert_eq!(
1263            remove_whitespace(r#"\:\`\<\>\\"#),
1264            Ok(("", Cow::Borrowed(r#"\:\`\<\>\\"#)))
1265        );
1266
1267        assert_eq!(
1268            remove_whitespace("http://www.py\n     thon.org"),
1269            Ok(("", Cow::Owned("http://www.python.org".to_string())))
1270        );
1271    }
1272}