Skip to main content

markdown_it/plugins/cmark/block/
reference.rs

1//! Link reference definition
2//!
3//! `[label]: /url "title"`
4//!
5//! <https://spec.commonmark.org/0.30/#link-reference-definition>
6//!
7//! This plugin parses markdown link references. Check documentation on [ReferenceMap]
8//! to see how you can use and/or extend it if you have external source for references.
9//!
10use std::collections::HashMap;
11use std::fmt::Debug;
12
13use derive_more::{Deref, DerefMut};
14use downcast_rs::{impl_downcast, Downcast};
15
16use crate::common::utils::normalize_reference;
17use crate::generics::inline::full_link;
18use crate::parser::block::{BlockRule, BlockState};
19use crate::parser::extset::RootExt;
20use crate::{MarkdownIt, Node, NodeValue};
21
22/// Storage for parsed references
23///
24/// if you have some external source for your link references, you can add them like this:
25///
26/// ```rust
27/// use markdown_it::parser::block::builtin::BlockParserRule;
28/// use markdown_it::parser::core::{CoreRule, Root};
29/// use markdown_it::plugins::cmark::block::reference::{ReferenceMap, DefaultReferenceMap, CustomReferenceMap};
30/// use markdown_it::{MarkdownIt, Node};
31///
32/// let md = &mut MarkdownIt::new();
33/// markdown_it::plugins::cmark::add(md);
34///
35/// #[derive(Debug, Default)]
36/// struct RefMapOverride(DefaultReferenceMap);
37/// impl CustomReferenceMap for RefMapOverride {
38///     fn get(&self, label: &str) -> Option<(&str, Option<&str>)> {
39///         // override a specific link
40///         if label == "rust" {
41///             return Some((
42///                 "https://www.rust-lang.org/",
43///                 Some("The Rust Language"),
44///             ));
45///         }
46///
47///         self.0.get(label)
48///     }
49///
50///     fn insert(&mut self, label: String, destination: String, title: Option<String>) -> bool {
51///         self.0.insert(label, destination, title)
52///     }
53/// }
54///
55/// struct AddCustomReferences;
56/// impl CoreRule for AddCustomReferences {
57///     fn run(root: &mut Node, _: &MarkdownIt) {
58///         let data = root.cast_mut::<Root>().unwrap();
59///         data.ext.insert(ReferenceMap::new(RefMapOverride::default()));
60///     }
61/// }
62///
63/// md.add_rule::<AddCustomReferences>()
64///     .before::<BlockParserRule>();
65///
66/// let html = md.parse("[rust]").render();
67/// assert_eq!(
68///     html.trim(),
69///     r#"<p><a href="https://www.rust-lang.org/" title="The Rust Language">rust</a></p>"#
70/// );
71/// ```
72///
73/// You can also view all references that user created by adding the following rule:
74///
75/// ```rust
76/// use markdown_it::parser::core::{CoreRule, Root};
77/// use markdown_it::plugins::cmark::block::reference::{ReferenceMap, DefaultReferenceMap};
78/// use markdown_it::{MarkdownIt, Node};
79///
80/// let md = &mut MarkdownIt::new();
81/// markdown_it::plugins::cmark::add(md);
82///
83/// let ast = md.parse("[hello]: world");
84/// let root = ast.node_value.downcast_ref::<Root>().unwrap();
85/// let refmap = root.ext.get::<ReferenceMap>()
86///     .map(|m| m.downcast_ref::<DefaultReferenceMap>().expect("expect references to be handled by default map"));
87///
88/// let mut labels = vec![];
89/// if let Some(refmap) = refmap {
90///     for (label, _dest, _title) in refmap.iter() {
91///         labels.push(label);
92///     }
93/// }
94///
95/// assert_eq!(labels, ["hello"]);
96/// ```
97///
98#[derive(Debug, Deref, DerefMut)]
99#[deref(forward)]
100#[deref_mut(forward)]
101pub struct ReferenceMap(Box<dyn CustomReferenceMap>);
102
103impl ReferenceMap {
104    pub fn new(custom_map: impl CustomReferenceMap + 'static) -> Self {
105        Self(Box::new(custom_map))
106    }
107}
108
109impl Default for ReferenceMap {
110    fn default() -> Self {
111        Self::new(DefaultReferenceMap::new())
112    }
113}
114
115impl RootExt for ReferenceMap {}
116
117pub trait CustomReferenceMap: Debug + Downcast + Send + Sync {
118    /// Insert new element to the reference map. You may return false if it's not a valid label to stop parsing.
119    fn insert(&mut self, label: String, destination: String, title: Option<String>) -> bool;
120
121    /// Get an element referenced by `label` from the map, returns destination and optional title.
122    fn get(&self, label: &str) -> Option<(&str, Option<&str>)>;
123}
124
125impl_downcast!(CustomReferenceMap);
126
127#[derive(Default, Debug)]
128pub struct DefaultReferenceMap(HashMap<ReferenceMapKey, ReferenceMapEntry>);
129
130impl DefaultReferenceMap {
131    pub fn new() -> Self {
132        Self::default()
133    }
134
135    pub fn iter(&self) -> impl Iterator<Item = (&str, &str, Option<&str>)> {
136        Box::new(
137            self.0
138                .iter()
139                .map(|(a, b)| (a.label.as_str(), b.destination.as_str(), b.title.as_deref())),
140        )
141    }
142}
143
144impl CustomReferenceMap for DefaultReferenceMap {
145    fn insert(&mut self, label: String, destination: String, title: Option<String>) -> bool {
146        let Some(key) = ReferenceMapKey::new(label) else {
147            return false;
148        };
149        self.0
150            .entry(key)
151            .or_insert(ReferenceMapEntry::new(destination, title));
152        true
153    }
154
155    fn get(&self, label: &str) -> Option<(&str, Option<&str>)> {
156        let key = ReferenceMapKey::new(label.to_owned())?;
157        self.0
158            .get(&key)
159            .map(|r| (r.destination.as_str(), r.title.as_deref()))
160    }
161}
162
163#[derive(Debug, Default)]
164/// Reference label
165struct ReferenceMapKey {
166    pub label: String,
167    normalized: String,
168}
169
170impl PartialEq for ReferenceMapKey {
171    fn eq(&self, other: &Self) -> bool {
172        self.normalized == other.normalized
173    }
174}
175
176impl Eq for ReferenceMapKey {}
177
178impl std::hash::Hash for ReferenceMapKey {
179    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
180        self.normalized.hash(state)
181    }
182}
183
184impl ReferenceMapKey {
185    pub fn new(label: String) -> Option<Self> {
186        let normalized = normalize_reference(&label);
187
188        if normalized.is_empty() {
189            // CommonMark 0.20 disallows empty labels
190            return None;
191        }
192
193        Some(Self { label, normalized })
194    }
195}
196
197#[derive(Debug, Default)]
198/// Reference value
199struct ReferenceMapEntry {
200    pub destination: String,
201    pub title: Option<String>,
202}
203
204impl ReferenceMapEntry {
205    pub fn new(destination: String, title: Option<String>) -> Self {
206        Self { destination, title }
207    }
208}
209
210/// Add plugin that parses markdown link references
211pub fn add(md: &mut MarkdownIt) {
212    md.block.add_rule::<ReferenceScanner>();
213}
214
215#[derive(Debug)]
216pub struct Definition {
217    pub label: String,
218    pub destination: String,
219    pub title: Option<String>,
220}
221impl NodeValue for Definition {
222    fn render(&self, _: &Node, _: &mut dyn crate::Renderer) {}
223}
224
225#[doc(hidden)]
226pub struct ReferenceScanner;
227impl BlockRule for ReferenceScanner {
228    fn check(_: &mut BlockState) -> Option<()> {
229        None // can't interrupt anything
230    }
231
232    fn run(state: &mut BlockState) -> Option<(Node, usize)> {
233        if state.line_indent(state.line) >= state.md.max_indent {
234            return None;
235        }
236
237        let mut chars = state.get_line(state.line).chars();
238
239        let Some('[') = chars.next() else {
240            return None;
241        };
242
243        // Simple check to quickly interrupt scan on [link](url) at the start of line.
244        // Can be useful on practice: https://github.com/markdown-it/markdown-it/issues/54
245        loop {
246            match chars.next() {
247                Some('\\') => {
248                    chars.next();
249                }
250                Some(']') => {
251                    if let Some(':') = chars.next() {
252                        break;
253                    } else {
254                        return None;
255                    }
256                }
257                Some(_) => {}
258                None => break,
259            }
260        }
261
262        let start_line = state.line;
263        let mut next_line = start_line;
264
265        // jump line-by-line until empty one or EOF
266        'outer: loop {
267            next_line += 1;
268
269            if next_line >= state.line_max || state.is_empty(next_line) {
270                break;
271            }
272
273            // this may be a code block normally, but after paragraph
274            // it's considered a lazy continuation regardless of what's there
275            if state.line_indent(next_line) >= state.md.max_indent {
276                continue;
277            }
278
279            // quirk for blockquotes, this line should already be checked by that rule
280            if state.line_offsets[next_line].indent_nonspace < 0 {
281                continue;
282            }
283
284            // Some tags can terminate paragraph without empty line.
285            let old_state_line = state.line;
286            state.line = next_line;
287            if state.test_rules_at_line() {
288                state.line = old_state_line;
289                break 'outer;
290            }
291            state.line = old_state_line;
292        }
293
294        let (str_before_trim, _) = state.get_lines(start_line, next_line, state.blk_indent, false);
295        let str = str_before_trim.trim();
296        let mut chars = str.char_indices();
297        chars.next(); // skip '['
298        let label_end;
299        let mut lines = 0;
300
301        loop {
302            match chars.next() {
303                Some((_, '[')) => return None,
304                Some((p, ']')) => {
305                    label_end = p;
306                    break;
307                }
308                Some((_, '\n')) => lines += 1,
309                Some((_, '\\')) => {
310                    if let Some((_, '\n')) = chars.next() {
311                        lines += 1;
312                    }
313                }
314                Some(_) => {}
315                None => return None,
316            }
317        }
318
319        let Some((_, ':')) = chars.next() else {
320            return None;
321        };
322
323        // [label]:   destination   'title'
324        //         ^^^ skip optional whitespace here
325        let mut pos = label_end + 2;
326        while let Some((_, ch @ (' ' | '\t' | '\n'))) = chars.next() {
327            if ch == '\n' {
328                lines += 1;
329            }
330            pos += 1;
331        }
332
333        // [label]:   destination   'title'
334        //            ^^^^^^^^^^^ parse this
335        let href;
336        if let Some(res) = full_link::parse_link_destination(str, pos, str.len()) {
337            if pos == res.pos {
338                return None;
339            }
340            href = state.md.link_formatter.normalize_link(&res.str);
341            state.md.link_formatter.validate_link(&href)?;
342            pos = res.pos;
343            lines += res.lines;
344        } else {
345            return None;
346        }
347
348        // save cursor state, we could require to rollback later
349        let dest_end_pos = pos;
350        let dest_end_lines = lines;
351
352        // [label]:   destination   'title'
353        //                       ^^^ skipping those spaces
354        let start = pos;
355        let mut chars = str[pos..].chars();
356        while let Some(ch @ (' ' | '\t' | '\n')) = chars.next() {
357            if ch == '\n' {
358                lines += 1;
359            }
360            pos += 1;
361        }
362
363        // [label]:   destination   'title'
364        //                          ^^^^^^^ parse this
365        let mut title = None;
366        if pos != start {
367            if let Some(res) = full_link::parse_link_title(str, pos, str.len()) {
368                title = Some(res.str);
369                pos = res.pos;
370                lines += res.lines;
371            } else {
372                pos = dest_end_pos;
373                lines = dest_end_lines;
374            }
375        }
376
377        // skip trailing spaces until the rest of the line
378        let mut chars = str[pos..].chars();
379        loop {
380            match chars.next() {
381                Some(' ' | '\t') => {} // pos no longer used
382                Some('\n') | None => break,
383                Some(_) if title.is_some() => {
384                    // garbage at the end of the line after title,
385                    // but it could still be a valid reference if we roll back
386                    title = None;
387                    pos = dest_end_pos;
388                    lines = dest_end_lines;
389                    chars = str[pos..].chars();
390                }
391                Some(_) => {
392                    // garbage at the end of the line
393                    return None;
394                }
395            }
396        }
397
398        let references = state.root_ext.get_or_insert_default::<ReferenceMap>();
399        if !references.insert(str[1..label_end].to_owned(), href.clone(), title.clone()) {
400            return None;
401        }
402
403        Some((
404            Node::new(Definition {
405                label: str[1..label_end].to_owned(),
406                destination: href,
407                title,
408            }),
409            lines + 1,
410        ))
411    }
412}