markdown_includes/rustdoc_parse/transform/intralinks/
links.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
4 */
5
6use crate::rustdoc_parse::transform::intralinks::ItemPath;
7use crate::rustdoc_parse::utils::{MarkdownItemIterator, Span};
8use pulldown_cmark::CowStr;
9use std::fmt;
10use std::fmt::Display;
11use unicase::UniCase;
12
13#[derive(Eq, PartialEq, Clone, Debug)]
14pub struct MarkdownReferenceLinkDefinition {
15    pub label: UniCase<String>,
16    pub link: Link,
17    pub raw_title: Option<String>,
18}
19
20impl MarkdownReferenceLinkDefinition {
21    fn new(
22        label: String,
23        link: Link,
24        raw_title: Option<String>,
25    ) -> MarkdownReferenceLinkDefinition {
26        MarkdownReferenceLinkDefinition {
27            label: UniCase::unicode(label),
28            link,
29            raw_title,
30        }
31    }
32
33    pub fn with_link(&self, link: Link) -> MarkdownReferenceLinkDefinition {
34        MarkdownReferenceLinkDefinition {
35            label: self.label.clone(),
36            link,
37            raw_title: self.raw_title.clone(),
38        }
39    }
40}
41
42impl Display for MarkdownReferenceLinkDefinition {
43    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
44        let MarkdownReferenceLinkDefinition {
45            label: key,
46            link,
47            raw_title,
48        } = self;
49
50        match raw_title {
51            None => write!(f, "[{key}]: {link}"),
52            Some(title) => write!(f, "[{key}]: {link} {title}"),
53        }
54    }
55}
56
57#[derive(Eq, PartialEq, Clone, Debug)]
58pub enum MarkdownLink {
59    Inline { link: MarkdownInlineLink },
60    Reference { link: MarkdownReferenceLink },
61}
62
63#[derive(Eq, PartialEq, Clone, Debug)]
64pub struct MarkdownInlineLink {
65    pub text: String,
66    pub link: Link,
67}
68
69impl MarkdownInlineLink {
70    pub fn with_link(&self, link: Link) -> MarkdownInlineLink {
71        MarkdownInlineLink {
72            text: self.text.clone(),
73            link,
74        }
75    }
76}
77
78impl Display for MarkdownInlineLink {
79    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
80        write!(f, "[{}]({})", self.text, self.link)
81    }
82}
83
84#[derive(Eq, PartialEq, Clone, Debug)]
85pub struct Link {
86    pub raw_link: String,
87}
88
89impl Link {
90    pub fn link_as_item_path(&self) -> Option<ItemPath> {
91        let link = self.split_link_fragment().0;
92
93        ItemPath::from_string(link)
94    }
95
96    fn split_link_fragment(&self) -> (&str, &str) {
97        fn strip_last_backtick(strip_backtick_end: bool, s: &str) -> &str {
98            match strip_backtick_end {
99                true => s.strip_suffix('`').unwrap_or(s),
100                false => s,
101            }
102        }
103
104        let strip_backtick_end: bool = self.raw_link.starts_with('`');
105        let link = self.raw_link.strip_prefix('`').unwrap_or(&self.raw_link);
106
107        match link.find('#') {
108            None => (strip_last_backtick(strip_backtick_end, link), ""),
109            Some(i) => {
110                let (l, f) = link.split_at(i);
111                (
112                    strip_last_backtick(strip_backtick_end, l),
113                    strip_last_backtick(strip_backtick_end, f),
114                )
115            }
116        }
117    }
118
119    pub fn link_fragment(&self) -> Option<&str> {
120        match self.split_link_fragment().1 {
121            "" => None,
122            f => Some(f),
123        }
124    }
125}
126
127impl From<String> for Link {
128    fn from(raw_link: String) -> Link {
129        Link { raw_link }
130    }
131}
132
133impl From<&str> for Link {
134    fn from(raw_link: &str) -> Link {
135        Link {
136            raw_link: raw_link.to_owned(),
137        }
138    }
139}
140
141impl Display for Link {
142    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
143        self.raw_link.fmt(f)
144    }
145}
146
147#[derive(Eq, PartialEq, Clone, Debug)]
148pub enum MarkdownReferenceLink {
149    Normal {
150        text: String,
151        label: UniCase<String>,
152    },
153    Shortcut {
154        text: UniCase<String>,
155    },
156}
157
158impl MarkdownReferenceLink {
159    fn new(text: String, label: String) -> MarkdownReferenceLink {
160        MarkdownReferenceLink::Normal {
161            text,
162            label: UniCase::unicode(label),
163        }
164    }
165
166    fn new_shortcut(text: String) -> MarkdownReferenceLink {
167        MarkdownReferenceLink::Shortcut {
168            text: UniCase::unicode(text),
169        }
170    }
171
172    pub fn text(&self) -> &str {
173        match self {
174            MarkdownReferenceLink::Normal { text, .. } => text,
175            MarkdownReferenceLink::Shortcut { text } => text.as_str(),
176        }
177    }
178
179    pub fn label(&self) -> &UniCase<String> {
180        match self {
181            MarkdownReferenceLink::Normal { label, .. } => label,
182            MarkdownReferenceLink::Shortcut { text } => text,
183        }
184    }
185}
186
187impl Display for MarkdownReferenceLink {
188    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
189        match self {
190            MarkdownReferenceLink::Normal { text, label } => write!(f, "[{}][{}]", text, label),
191            MarkdownReferenceLink::Shortcut { text: label } => write!(f, "[{}]", label),
192        }
193    }
194}
195
196pub fn markdown_link_iterator(source: &str) -> MarkdownItemIterator<MarkdownLink> {
197    use pulldown_cmark::{Event, LinkType, Options, Parser, Tag};
198
199    // We need to define a callback for broken links so that we can see broken links so that we can
200    // strip them.
201    let mut broken_link_callback = |_| {
202        Some((
203            CowStr::Borrowed("fake://link"),
204            CowStr::Borrowed("fake title"),
205        ))
206    };
207    let parser = Parser::new_with_broken_link_callback(
208        source,
209        Options::all(),
210        Some(&mut broken_link_callback),
211    );
212
213    let mut in_link = false;
214    let mut start_text = 0;
215    let mut end_text = 0;
216
217    let iter = parser
218        .into_offset_iter()
219        .filter_map(move |(event, range)| match event {
220            Event::Start(Tag::Link(
221                LinkType::Inline
222                | LinkType::Reference
223                | LinkType::Shortcut
224                | LinkType::ReferenceUnknown
225                | LinkType::ShortcutUnknown,
226                ..,
227            )) => {
228                in_link = true;
229                start_text = range.start + 1;
230                end_text = range.end;
231                None
232            }
233            Event::End(Tag::Link(LinkType::Inline, ..)) => {
234                in_link = false;
235
236                let text = source[start_text..end_text].to_owned();
237                let link = source[(end_text + 2)..(range.end - 1)].to_owned().into();
238
239                let link = MarkdownLink::Inline {
240                    link: MarkdownInlineLink { text, link },
241                };
242
243                Some((range.into(), link))
244            }
245            Event::End(Tag::Link(LinkType::Reference | LinkType::ReferenceUnknown, ..)) => {
246                in_link = false;
247
248                let text = source[start_text..end_text].to_owned();
249                let label = source[(end_text + 2)..(range.end - 1)].to_owned();
250
251                let link = MarkdownLink::Reference {
252                    link: MarkdownReferenceLink::new(text, label),
253                };
254
255                Some((range.into(), link))
256            }
257            Event::End(Tag::Link(LinkType::Shortcut | LinkType::ShortcutUnknown, ..)) => {
258                in_link = false;
259
260                let text = source[start_text..end_text].to_owned();
261
262                let link = MarkdownLink::Reference {
263                    link: MarkdownReferenceLink::new_shortcut(text),
264                };
265
266                Some((range.into(), link))
267            }
268            _ => {
269                if in_link {
270                    end_text = range.end;
271                }
272
273                None
274            }
275        });
276
277    // Unfortunately we need to collect the iterator here, because the parser references
278    // `broken_link_callback` which is in the local stack.
279    let collected = iter.collect::<Vec<_>>();
280
281    MarkdownItemIterator::new(source, collected.into_iter())
282}
283
284fn parse_raw_reference_link_definition(
285    label: &str,
286    raw_ref_def: &str,
287) -> Option<MarkdownReferenceLinkDefinition> {
288    // We need to parse things manually here, because the pulldown-cmark parser escapes the title
289    // and the link.  We need the raw version to emit them later.
290
291    let Some(link_and_title) = raw_ref_def.get(label.len() + 3..).map(str::trim) else {
292        return None;
293    };
294
295    assert_eq!(
296        raw_ref_def.get(label.len() + 1..label.len() + 3),
297        Some("]:"),
298        "This should never happen, but if it does please report it as a bug.",
299    );
300
301    let (link, title) = match link_and_title.find(char::is_whitespace) {
302        None => (link_and_title, None),
303        Some(i) => {
304            let (link, title) = link_and_title.split_at(i);
305            let title = match title.trim() {
306                "" => None,
307                title => Some(title),
308            };
309
310            (link.trim(), title)
311        }
312    };
313
314    let link = MarkdownReferenceLinkDefinition::new(
315        label.to_owned(),
316        link.into(),
317        title.map(ToOwned::to_owned),
318    );
319
320    Some(link)
321}
322
323pub fn markdown_reference_link_definition_iterator(
324    source: &str,
325) -> MarkdownItemIterator<MarkdownReferenceLinkDefinition> {
326    use pulldown_cmark::{Options, Parser};
327
328    let parser = Parser::new_ext(source, Options::all());
329
330    let mut link_defs: Vec<(Span, MarkdownReferenceLinkDefinition)> = parser
331        .reference_definitions()
332        .iter()
333        .filter_map(|(label, ref_def)| {
334            let raw_ref_def = &source[ref_def.span.clone()];
335
336            parse_raw_reference_link_definition(label, raw_ref_def)
337                .map(|link| (Span::from(ref_def.span.clone()), link))
338        })
339        .collect::<Vec<_>>();
340
341    // `Parser::reference_definitions()` does not traverse the definitions in order. But
342    // `MarkdownItemIterator::complete()` expects the iterated-over spans to be in order.
343    link_defs.sort_by_key(|(span, _)| span.clone());
344
345    MarkdownItemIterator::new(source, link_defs.into_iter())
346}
347
348#[cfg(test)]
349mod tests {
350    use super::*;
351    use indoc::indoc;
352
353    #[allow(clippy::too_many_lines)]
354    #[test]
355    fn test_markdown_link_iterator() {
356        let source = "A [some text] [another](http://foo.com), [another][one]";
357
358        let mut iter = markdown_link_iterator(source).items_with_spans();
359
360        let (Span { start, end }, link) = iter.next().unwrap();
361        assert_eq!(
362            link,
363            MarkdownLink::Reference {
364                link: MarkdownReferenceLink::new_shortcut("some text".to_owned()),
365            }
366        );
367        assert_eq!(&source.to_string()[start..end], "[some text]");
368
369        let (Span { start, end }, link) = iter.next().unwrap();
370        assert_eq!(
371            link,
372            MarkdownLink::Inline {
373                link: MarkdownInlineLink {
374                    text: "another".to_owned(),
375                    link: "http://foo.com".into(),
376                }
377            }
378        );
379        assert_eq!(&source.to_string()[start..end], "[another](http://foo.com)");
380
381        let (Span { start, end }, link) = iter.next().unwrap();
382        assert_eq!(
383            link,
384            MarkdownLink::Reference {
385                link: MarkdownReferenceLink::new("another".to_owned(), "one".to_owned()),
386            }
387        );
388        assert_eq!(&source.to_string()[start..end], "[another][one]");
389
390        assert_eq!(iter.next(), None);
391
392        let source = "[another](http://foo.com)[another][one]";
393        let mut iter = markdown_link_iterator(&source).items_with_spans();
394
395        let (Span { start, end }, link) = iter.next().unwrap();
396        assert_eq!(
397            link,
398            MarkdownLink::Inline {
399                link: MarkdownInlineLink {
400                    text: "another".to_owned(),
401                    link: "http://foo.com".into()
402                }
403            }
404        );
405        assert_eq!(&source.to_string()[start..end], "[another](http://foo.com)");
406
407        let (Span { start, end }, link) = iter.next().unwrap();
408        assert_eq!(
409            link,
410            MarkdownLink::Reference {
411                link: MarkdownReferenceLink::new("another".to_owned(), "one".to_owned()),
412            }
413        );
414        assert_eq!(&source.to_string()[start..end], "[another][one]");
415
416        assert_eq!(iter.next(), None);
417
418        let source = "A [some [text]], [another [text2] (foo)](http://foo.com/foo(bar)), [another [] one][foo[]bar]";
419        let mut iter = markdown_link_iterator(&source).items_with_spans();
420
421        let (Span { start, end }, link) = iter.next().unwrap();
422        assert_eq!(
423            link,
424            MarkdownLink::Reference {
425                link: MarkdownReferenceLink::new_shortcut("text".to_owned()),
426            }
427        );
428        assert_eq!(&source.to_string()[start..end], "[text]");
429
430        let (Span { start, end }, link) = iter.next().unwrap();
431        assert_eq!(
432            link,
433            MarkdownLink::Reference {
434                link: MarkdownReferenceLink::new_shortcut("text2".to_owned()),
435            }
436        );
437        assert_eq!(&source.to_string()[start..end], "[text2]");
438
439        assert_eq!(iter.next(), None);
440
441        let source = "A [some \\]text], [another](http://foo.\\(com\\)), [another\\]][one\\]]";
442        let mut iter = markdown_link_iterator(&source).items_with_spans();
443
444        let (Span { start, end }, link) = iter.next().unwrap();
445        assert_eq!(
446            link,
447            MarkdownLink::Reference {
448                link: MarkdownReferenceLink::new_shortcut(r"some \]text".to_owned()),
449            }
450        );
451        assert_eq!(&source.to_string()[start..end], r"[some \]text]");
452
453        let (Span { start, end }, link) = iter.next().unwrap();
454        assert_eq!(
455            link,
456            MarkdownLink::Inline {
457                link: MarkdownInlineLink {
458                    text: "another".to_owned(),
459                    link: r"http://foo.\(com\)".into(),
460                }
461            }
462        );
463        assert_eq!(
464            &source.to_string()[start..end],
465            r"[another](http://foo.\(com\))"
466        );
467
468        let (Span { start, end }, link) = iter.next().unwrap();
469        assert_eq!(
470            link,
471            MarkdownLink::Reference {
472                link: MarkdownReferenceLink::new(r"another\]".to_owned(), r"one\]".to_owned()),
473            }
474        );
475        assert_eq!(&source.to_string()[start..end], r"[another\]][one\]]");
476
477        assert_eq!(iter.next(), None);
478
479        let source = "A `this is no link [link](http://foo.com)`";
480        let mut iter = markdown_link_iterator(&source).items_with_spans();
481
482        assert_eq!(iter.next(), None);
483
484        let source = "A\n```\nthis is no link [link](http://foo.com)\n```";
485        let mut iter = markdown_link_iterator(&source).items_with_spans();
486
487        assert_eq!(iter.next(), None);
488
489        let source = "A [link with `code`!](http://foo.com)!";
490        let mut iter = markdown_link_iterator(&source).items_with_spans();
491
492        let (Span { start, end }, link) = iter.next().unwrap();
493        assert_eq!(
494            link,
495            MarkdownLink::Inline {
496                link: MarkdownInlineLink {
497                    text: "link with `code`!".to_owned(),
498                    link: "http://foo.com".into(),
499                }
500            }
501        );
502        assert_eq!(
503            &source.to_string()[start..end],
504            "[link with `code`!](http://foo.com)"
505        );
506
507        assert_eq!(iter.next(), None);
508
509        let doc = indoc! { r#"
510            Here a link with [some text] and [another][one].
511
512            [some text]: https://en.wikipedia.org/wiki/Markdown
513            [one]: https://en.wikipedia.org/wiki/Markdown
514            "#
515        };
516
517        let mut iter = markdown_link_iterator(&doc).items_with_spans();
518
519        let (Span { start, end }, link) = iter.next().unwrap();
520        assert_eq!(
521            link,
522            MarkdownLink::Reference {
523                link: MarkdownReferenceLink::new_shortcut("some text".to_owned()),
524            }
525        );
526        assert_eq!(&doc.to_string()[start..end], "[some text]");
527
528        let (Span { start, end }, link) = iter.next().unwrap();
529        assert_eq!(
530            link,
531            MarkdownLink::Reference {
532                link: MarkdownReferenceLink::new("another".to_owned(), "one".to_owned()),
533            }
534        );
535        assert_eq!(&doc.to_string()[start..end], "[another][one]");
536
537        assert_eq!(iter.next(), None);
538    }
539
540    #[test]
541    fn test_markdown_reference_link_definition_iterator() {
542        let doc = indoc! { r#"
543            [ref]: https://en.wikipedia.org/wiki/Markdown "This is the title"
544            [Another ref]: https://en.wikipedia.org/wiki/Markdown
545            [And another one]: https://en.wikipedia.org/wiki/Markdown "Title with &amp;"
546            [spaces]:    https://en.wikipedia.org/wiki/Markdown    " Title  with  spaces  "
547            "#
548        };
549        let source = doc;
550
551        let mut iter = markdown_reference_link_definition_iterator(&doc).items_with_spans();
552
553        let (Span { start, end }, link) = iter.next().unwrap();
554        assert_eq!(
555            link,
556            MarkdownReferenceLinkDefinition::new(
557                "ref".to_owned(),
558                "https://en.wikipedia.org/wiki/Markdown".into(),
559                Some("\"This is the title\"".to_owned()),
560            )
561        );
562        assert_eq!(
563            &source.to_string()[start..end],
564            "[ref]: https://en.wikipedia.org/wiki/Markdown \"This is the title\""
565        );
566
567        let (Span { start, end }, link) = iter.next().unwrap();
568        assert_eq!(
569            link,
570            MarkdownReferenceLinkDefinition::new(
571                "Another ref".to_owned(),
572                "https://en.wikipedia.org/wiki/Markdown".into(),
573                None,
574            )
575        );
576        assert_eq!(
577            &source.to_string()[start..end],
578            "[Another ref]: https://en.wikipedia.org/wiki/Markdown"
579        );
580
581        let (Span { start, end }, link) = iter.next().unwrap();
582        assert_eq!(
583            link,
584            MarkdownReferenceLinkDefinition::new(
585                "And another one".to_owned(),
586                "https://en.wikipedia.org/wiki/Markdown".into(),
587                Some("\"Title with &amp;\"".to_owned()),
588            )
589        );
590        assert_eq!(
591            &source.to_string()[start..end],
592            "[And another one]: https://en.wikipedia.org/wiki/Markdown \"Title with &amp;\""
593        );
594
595        let (Span { start, end }, link) = iter.next().unwrap();
596        assert_eq!(
597            link,
598            MarkdownReferenceLinkDefinition::new(
599                "spaces".to_owned(),
600                "https://en.wikipedia.org/wiki/Markdown".into(),
601                Some("\" Title  with  spaces  \"".to_owned()),
602            )
603        );
604        assert_eq!(
605            &source.to_string()[start..end],
606            "[spaces]:    https://en.wikipedia.org/wiki/Markdown    \" Title  with  spaces  \""
607        );
608
609        assert_eq!(iter.next(), None);
610    }
611}