Skip to main content

dmc_transform/builtin/
bare_url.rs

1use crate::pipeline::Transformer;
2use crate::visit::{NodeAction, Visitor, walk_root};
3use dmc_diagnostic::Code;
4use dmc_diagnostic::metadata::SourceMeta;
5use dmc_parser::ast::*;
6
7/// Wrap bare `http(s)://...` substrings in `Text` nodes with synthesised
8/// `Link` nodes. Scans `Paragraph`, `Heading`, and inline emphasis containers.
9#[derive(Default)]
10pub struct BareUrlAutolink;
11
12impl Transformer for BareUrlAutolink {
13  fn name(&self) -> &str {
14    "bare-url-autolink"
15  }
16  fn transform(
17    &self,
18    doc: &mut Document,
19    _meta: &SourceMeta,
20    _diag_engine: &mut duck_diagnostic::DiagnosticEngine<Code>,
21  ) {
22    let mut v = Apply;
23    walk_root(&mut doc.children, &mut v);
24  }
25}
26
27struct Apply;
28
29impl Apply {
30  /// Expand any `Text` whose value contains a URL into `[Text, Link, Text,
31  /// ...]` pieces. Non-Text nodes pass through.
32  fn rewrite_children(nodes: Vec<Node>) -> Vec<Node> {
33    let mut out = Vec::new();
34    for n in nodes {
35      if let Node::Text(t) = &n {
36        let pieces = Self::split_by_url(&t.value);
37        if pieces.len() == 1 {
38          out.push(n.clone());
39          continue;
40        }
41        let span = t.span.clone();
42        for piece in pieces {
43          match piece {
44            Piece::Text(s) if !s.is_empty() => out.push(Node::Text(Text { value: s, span: span.clone() })),
45            Piece::Text(_) => {},
46            Piece::Url(url) => out.push(Node::Link(Link {
47              href: url.clone(),
48              title: None,
49              children: vec![Node::Text(Text { value: url, span: span.clone() })],
50              span: span.clone(),
51            })),
52          }
53        }
54      } else {
55        out.push(n);
56      }
57    }
58    out
59  }
60
61  /// Split `s` into alternating `Text` / `Url` pieces around `http(s)://`
62  /// runs. URL boundary is whitespace, `)`, `<`, or `>`.
63  fn split_by_url(s: &str) -> Vec<Piece> {
64    let mut out = Vec::new();
65    let mut rest = s;
66    while let Some(idx) = rest.find("http") {
67      let before = &rest[..idx];
68      let after = &rest[idx..];
69      if !(after.starts_with("http://") || after.starts_with("https://")) {
70        out.push(Piece::Text(format!("{}{}", before, &rest[idx..idx + 1])));
71        rest = &rest[idx + 1..];
72        continue;
73      }
74      let url_end = after.find(|c: char| c.is_whitespace() || c == ')' || c == '<' || c == '>').unwrap_or(after.len());
75      let url = &after[..url_end];
76      if !before.is_empty() {
77        out.push(Piece::Text(before.to_string()));
78      }
79      out.push(Piece::Url(url.to_string()));
80      rest = &after[url_end..];
81    }
82    if !rest.is_empty() {
83      out.push(Piece::Text(rest.to_string()));
84    }
85    if out.is_empty() {
86      out.push(Piece::Text(String::new()));
87    }
88    out
89  }
90}
91
92impl Visitor for Apply {
93  fn visit_node(&mut self, node: &mut Node) -> NodeAction {
94    match node {
95      Node::Paragraph(p) => p.children = Self::rewrite_children(std::mem::take(&mut p.children)),
96      Node::Heading(h) => h.children = Self::rewrite_children(std::mem::take(&mut h.children)),
97      Node::Bold(i) | Node::Italic(i) | Node::Strikethrough(i) => {
98        i.children = Self::rewrite_children(std::mem::take(&mut i.children));
99      },
100      _ => {},
101    }
102    NodeAction::Keep
103  }
104}
105
106enum Piece {
107  Text(String),
108  Url(String),
109}