dmc_transform/builtin/
bare_url.rs1use crate::pipeline::Transformer;
4use crate::visit::{NodeAction, Visitor, walk_root};
5use dmc_diagnostic::Code;
6use dmc_diagnostic::metadata::SourceMeta;
7use dmc_parser::ast::*;
8
9#[derive(Default)]
12pub struct BareUrlAutolink;
13
14impl Transformer for BareUrlAutolink {
15 fn name(&self) -> &str {
16 "bare-url-autolink"
17 }
18 fn transform(
19 &self,
20 doc: &mut Document,
21 _meta: &SourceMeta,
22 _diag_engine: &mut duck_diagnostic::DiagnosticEngine<Code>,
23 ) {
24 let mut v = Apply;
25 walk_root(&mut doc.children, &mut v);
26 }
27}
28
29struct Apply;
30
31impl Apply {
32 fn rewrite_children(nodes: Vec<Node>) -> Vec<Node> {
35 let mut out = Vec::new();
36 for n in nodes {
37 if let Node::Text(t) = &n {
38 let pieces = Self::split_by_url(&t.value);
39 let any_url = pieces.iter().any(|p| matches!(p, Piece::Url(_) | Piece::Display(_)));
43 if !any_url {
44 out.push(n.clone());
45 continue;
46 }
47 let span = t.span.clone();
48 let mut iter = pieces.into_iter().peekable();
49 while let Some(piece) = iter.next() {
50 match piece {
51 Piece::Text(s) if !s.is_empty() => out.push(Node::Text(Text { value: s, span: span.clone() })),
52 Piece::Text(_) => {},
53 Piece::Url(href) => {
54 let display = match iter.peek() {
55 Some(Piece::Display(_)) => match iter.next() {
56 Some(Piece::Display(d)) => d,
57 _ => href.clone(),
58 },
59 _ => href.clone(),
60 };
61 out.push(Node::Link(Link {
62 href,
63 title: None,
64 children: vec![Node::Text(Text { value: display, span: span.clone() })],
65 span: span.clone(),
66 }));
67 },
68 Piece::Display(d) => {
69 if !d.is_empty() {
71 out.push(Node::Text(Text { value: d, span: span.clone() }));
72 }
73 },
74 }
75 }
76 } else {
77 out.push(n);
78 }
79 }
80 out
81 }
82
83 fn split_by_url(s: &str) -> Vec<Piece> {
89 fn next_url_match(rest: &str) -> Option<(usize, &'static str)> {
90 let bytes = rest.as_bytes();
94 let mut best: Option<(usize, &'static str)> = None;
95 for prefix in ["http://", "https://", "www."] {
96 if let Some(idx) = rest.find(prefix) {
97 let ok_boundary =
98 idx == 0 || matches!(bytes.get(idx - 1).copied(), Some(b) if !b.is_ascii_alphanumeric() && b != b'_');
99 if !ok_boundary {
100 continue;
101 }
102 if best.is_none_or(|(b, _)| idx < b) {
103 best = Some((idx, prefix));
104 }
105 }
106 }
107 best
108 }
109 fn url_body_end(after: &str) -> usize {
110 after.find(|c: char| c.is_whitespace() || c == '<').unwrap_or(after.len())
111 }
112 fn trim_trailing(s: &str) -> (&str, &str) {
113 let bytes = s.as_bytes();
114 let mut end = bytes.len();
115 loop {
116 if end == 0 {
117 break;
118 }
119 let last = bytes[end - 1];
120 if matches!(last, b'?' | b'!' | b'.' | b',' | b':' | b'*' | b'_' | b'~') {
122 end -= 1;
123 continue;
124 }
125 if last == b')' {
128 let opens = bytes[..end].iter().filter(|&&b| b == b'(').count();
129 let closes = bytes[..end].iter().filter(|&&b| b == b')').count();
130 if closes > opens {
131 end -= 1;
132 continue;
133 }
134 }
135 if last == b';'
138 && let Some(amp) = bytes[..end - 1].iter().rposition(|&b| b == b'&')
139 {
140 let inner = &bytes[amp + 1..end - 1];
141 if !inner.is_empty() && inner.iter().all(|&b| b.is_ascii_alphanumeric()) {
142 end = amp;
143 continue;
144 }
145 }
146 break;
147 }
148 (&s[..end], &s[end..])
149 }
150
151 let mut out = Vec::new();
152 let mut rest = s;
153 while let Some((idx, prefix)) = next_url_match(rest) {
154 let before = &rest[..idx];
155 let after = &rest[idx..];
156 let url_end = url_body_end(after);
157 let raw = &after[..url_end];
158 let (url, trailing_punct) = trim_trailing(raw);
159 if prefix == "www." && !url.get(prefix.len()..).is_some_and(|body| body.contains('.')) {
165 out.push(Piece::Text(format!("{}{}", before, prefix)));
166 rest = &after[prefix.len()..];
167 continue;
168 }
169 if url.is_empty() {
170 out.push(Piece::Text(before.to_string()));
171 rest = &after[1..];
172 continue;
173 }
174 if !before.is_empty() {
175 out.push(Piece::Text(before.to_string()));
176 }
177 let href = if prefix == "www." { format!("http://{}", url) } else { url.to_string() };
178 out.push(Piece::Url(href));
179 out.push(Piece::Display(url.to_string()));
180 if !trailing_punct.is_empty() {
181 out.push(Piece::Text(trailing_punct.to_string()));
182 }
183 rest = &after[url_end..];
184 }
185 if !rest.is_empty() {
186 out.push(Piece::Text(rest.to_string()));
187 }
188 if out.is_empty() {
189 out.push(Piece::Text(String::new()));
190 }
191 out
192 }
193}
194
195impl Visitor for Apply {
196 fn visit_node(&mut self, node: &mut Node) -> NodeAction {
197 match node {
198 Node::Paragraph(p) => p.children = Self::rewrite_children(std::mem::take(&mut p.children)),
199 Node::Heading(h) => h.children = Self::rewrite_children(std::mem::take(&mut h.children)),
200 Node::Bold(i) | Node::Italic(i) | Node::Strikethrough(i) => {
201 i.children = Self::rewrite_children(std::mem::take(&mut i.children));
202 },
203 _ => {},
204 }
205 NodeAction::Keep
206 }
207}
208
209enum Piece {
210 Text(String),
211 Url(String),
214 Display(String),
217}