markdown_it/plugins/cmark/block/
reference.rs

1//! Link reference definition
2//!
3//! `[label]: /url "title"`
4//!
5//! <https://spec.commonmark.org/0.30/#link-reference-definition>
6//!
7//! This plugin parses markdown link references. Check documentation on [ReferenceMap]
8//! to see how you can use and/or extend it if you have external source for references.
9//!
10use derivative::Derivative;
11use derive_more::{Deref, DerefMut};
12use downcast_rs::{impl_downcast, Downcast};
13use std::collections::HashMap;
14use std::fmt::Debug;
15
16use crate::common::utils::normalize_reference;
17use crate::generics::inline::full_link;
18use crate::parser::block::{BlockRule, BlockState};
19use crate::parser::extset::RootExt;
20use crate::{MarkdownIt, Node, NodeValue};
21
22/// Storage for parsed references
23///
24/// if you have some external source for your link references, you can add them like this:
25///
26/// ```rust
27/// use markdown_it::parser::block::builtin::BlockParserRule;
28/// use markdown_it::parser::core::{CoreRule, Root};
29/// use markdown_it::plugins::cmark::block::reference::{ReferenceMap, DefaultReferenceMap, CustomReferenceMap};
30/// use markdown_it::{MarkdownIt, Node};
31///
32/// let md = &mut MarkdownIt::new();
33/// markdown_it::plugins::cmark::add(md);
34///
35/// #[derive(Debug, Default)]
36/// struct RefMapOverride(DefaultReferenceMap);
37/// impl CustomReferenceMap for RefMapOverride {
38///     fn get(&self, label: &str) -> Option<(&str, Option<&str>)> {
39///         // override a specific link
40///         if label == "rust" {
41///             return Some((
42///                 "https://www.rust-lang.org/",
43///                 Some("The Rust Language"),
44///             ));
45///         }
46///
47///         self.0.get(label)
48///     }
49///
50///     fn insert(&mut self, label: String, destination: String, title: Option<String>) -> bool {
51///         self.0.insert(label, destination, title)
52///     }
53/// }
54///
55/// struct AddCustomReferences;
56/// impl CoreRule for AddCustomReferences {
57///     fn run(root: &mut Node, _: &MarkdownIt) {
58///         let data = root.cast_mut::<Root>().unwrap();
59///         data.ext.insert(ReferenceMap::new(RefMapOverride::default()));
60///     }
61/// }
62///
63/// md.add_rule::<AddCustomReferences>()
64///     .before::<BlockParserRule>();
65///
66/// let html = md.parse("[rust]").render();
67/// assert_eq!(
68///     html.trim(),
69///     r#"<p><a href="https://www.rust-lang.org/" title="The Rust Language">rust</a></p>"#
70/// );
71/// ```
72///
73/// You can also view all references that user created by adding the following rule:
74///
75/// ```rust
76/// use markdown_it::parser::core::{CoreRule, Root};
77/// use markdown_it::plugins::cmark::block::reference::{ReferenceMap, DefaultReferenceMap};
78/// use markdown_it::{MarkdownIt, Node};
79///
80/// let md = &mut MarkdownIt::new();
81/// markdown_it::plugins::cmark::add(md);
82///
83/// let ast = md.parse("[hello]: world");
84/// let root = ast.node_value.downcast_ref::<Root>().unwrap();
85/// let refmap = root.ext.get::<ReferenceMap>()
86///     .map(|m| m.downcast_ref::<DefaultReferenceMap>().expect("expect references to be handled by default map"));
87///
88/// let mut labels = vec![];
89/// if let Some(refmap) = refmap {
90///     for (label, _dest, _title) in refmap.iter() {
91///         labels.push(label);
92///     }
93/// }
94///
95/// assert_eq!(labels, ["hello"]);
96/// ```
97///
98#[derive(Debug, Deref, DerefMut)]
99#[deref(forward)]
100#[deref_mut(forward)]
101pub struct ReferenceMap(Box<dyn CustomReferenceMap>);
102
103impl ReferenceMap {
104    pub fn new(custom_map: impl CustomReferenceMap + 'static) -> Self {
105        Self(Box::new(custom_map))
106    }
107}
108
109impl Default for ReferenceMap {
110    fn default() -> Self {
111        Self::new(DefaultReferenceMap::new())
112    }
113}
114
115impl RootExt for ReferenceMap {}
116
117pub trait CustomReferenceMap : Debug + Downcast + Send + Sync {
118    /// Insert new element to the reference map. You may return false if it's not a valid label to stop parsing.
119    fn insert(&mut self, label: String, destination: String, title: Option<String>) -> bool;
120
121    /// Get an element referenced by `label` from the map, returns destination and optional title.
122    fn get(&self, label: &str) -> Option<(&str, Option<&str>)>;
123}
124
125impl_downcast!(CustomReferenceMap);
126
127#[derive(Default, Debug)]
128pub struct DefaultReferenceMap(HashMap<ReferenceMapKey, ReferenceMapEntry>);
129
130impl DefaultReferenceMap {
131    pub fn new() -> Self {
132        Self::default()
133    }
134
135    pub fn iter(&self) -> impl Iterator<Item = (&str, &str, Option<&str>)> {
136        Box::new(self.0.iter().map(|(a, b)| {
137            (a.label.as_str(), b.destination.as_str(), b.title.as_deref())
138        }))
139    }
140}
141
142impl CustomReferenceMap for DefaultReferenceMap {
143    fn insert(&mut self, label: String, destination: String, title: Option<String>) -> bool {
144        let Some(key) = ReferenceMapKey::new(label) else { return false; };
145        self.0.entry(key)
146            .or_insert(ReferenceMapEntry::new(destination, title));
147        true
148    }
149
150    fn get(&self, label: &str) -> Option<(&str, Option<&str>)> {
151        let key = ReferenceMapKey::new(label.to_owned())?;
152        self.0.get(&key)
153            .map(|r| (r.destination.as_str(), r.title.as_deref()))
154    }
155}
156
157#[derive(Derivative)]
158#[derivative(Debug, Default, Hash, PartialEq, Eq)]
159/// Reference label
160struct ReferenceMapKey {
161    #[derivative(PartialEq = "ignore")]
162    #[derivative(Hash = "ignore")]
163    pub label: String,
164    normalized: String,
165}
166
167impl ReferenceMapKey {
168    pub fn new(label: String) -> Option<Self> {
169        let normalized = normalize_reference(&label);
170
171        if normalized.is_empty() {
172            // CommonMark 0.20 disallows empty labels
173            return None;
174        }
175
176        Some(Self { label, normalized })
177    }
178}
179
180#[derive(Debug, Default)]
181/// Reference value
182struct ReferenceMapEntry {
183    pub destination: String,
184    pub title: Option<String>,
185}
186
187impl ReferenceMapEntry {
188    pub fn new(destination: String, title: Option<String>) -> Self {
189        Self { destination, title }
190    }
191}
192
193/// Add plugin that parses markdown link references
194pub fn add(md: &mut MarkdownIt) {
195    md.block.add_rule::<ReferenceScanner>();
196}
197
198#[derive(Debug)]
199pub struct Definition {
200    pub label: String,
201    pub destination: String,
202    pub title: Option<String>,
203}
204impl NodeValue for Definition {
205    fn render(&self, _: &Node, _: &mut dyn crate::Renderer) {}
206}
207
208#[doc(hidden)]
209pub struct ReferenceScanner;
210impl BlockRule for ReferenceScanner {
211    fn check(_: &mut BlockState) -> Option<()> {
212        None // can't interrupt anything
213    }
214
215    fn run(state: &mut BlockState) -> Option<(Node, usize)> {
216
217        if state.line_indent(state.line) >= state.md.max_indent { return None; }
218
219        let mut chars = state.get_line(state.line).chars();
220
221        let Some('[') = chars.next() else { return None; };
222
223        // Simple check to quickly interrupt scan on [link](url) at the start of line.
224        // Can be useful on practice: https://github.com/markdown-it/markdown-it/issues/54
225        loop {
226            match chars.next() {
227                Some('\\') => { chars.next(); },
228                Some(']') => {
229                    if let Some(':') = chars.next() {
230                        break;
231                    } else {
232                        return None;
233                    }
234                }
235                Some(_) => {},
236                None => break,
237            }
238        }
239
240        let start_line = state.line;
241        let mut next_line = start_line;
242
243        // jump line-by-line until empty one or EOF
244        'outer: loop {
245            next_line += 1;
246
247            if next_line >= state.line_max || state.is_empty(next_line) { break; }
248
249            // this may be a code block normally, but after paragraph
250            // it's considered a lazy continuation regardless of what's there
251            if state.line_indent(next_line) >= state.md.max_indent { continue; }
252
253            // quirk for blockquotes, this line should already be checked by that rule
254            if state.line_offsets[next_line].indent_nonspace < 0 { continue; }
255
256            // Some tags can terminate paragraph without empty line.
257            let old_state_line = state.line;
258            state.line = next_line;
259            if state.test_rules_at_line() {
260                state.line = old_state_line;
261                break 'outer;
262            }
263            state.line = old_state_line;
264        }
265
266        let (str_before_trim, _) = state.get_lines(start_line, next_line, state.blk_indent, false);
267        let str = str_before_trim.trim();
268        let mut chars = str.char_indices();
269        chars.next(); // skip '['
270        let label_end;
271        let mut lines = 0;
272
273        loop {
274            match chars.next() {
275                Some((_, '[')) => return None,
276                Some((p, ']')) => {
277                    label_end = p;
278                    break;
279                }
280                Some((_, '\n')) => lines += 1,
281                Some((_, '\\')) => {
282                    if let Some((_, '\n')) = chars.next() {
283                        lines += 1;
284                    }
285                }
286                Some(_) => {},
287                None => return None,
288            }
289        }
290
291        let Some((_, ':')) = chars.next() else { return None; };
292
293        // [label]:   destination   'title'
294        //         ^^^ skip optional whitespace here
295        let mut pos = label_end + 2;
296        while let Some((_, ch @ (' ' | '\t' | '\n'))) = chars.next() {
297            if ch == '\n' { lines += 1; }
298            pos += 1;
299        }
300
301        // [label]:   destination   'title'
302        //            ^^^^^^^^^^^ parse this
303        let href;
304        if let Some(res) = full_link::parse_link_destination(str, pos, str.len()) {
305            if pos == res.pos { return None; }
306            href = state.md.link_formatter.normalize_link(&res.str);
307            state.md.link_formatter.validate_link(&href)?;
308            pos = res.pos;
309            lines += res.lines;
310        } else {
311            return None;
312        }
313
314        // save cursor state, we could require to rollback later
315        let dest_end_pos = pos;
316        let dest_end_lines = lines;
317
318        // [label]:   destination   'title'
319        //                       ^^^ skipping those spaces
320        let start = pos;
321        let mut chars = str[pos..].chars();
322        while let Some(ch @ (' ' | '\t' | '\n')) = chars.next() {
323            if ch == '\n' { lines += 1; }
324            pos += 1;
325        }
326
327        // [label]:   destination   'title'
328        //                          ^^^^^^^ parse this
329        let mut title = None;
330        if pos != start {
331            if let Some(res) = full_link::parse_link_title(str, pos, str.len()) {
332                title = Some(res.str);
333                pos = res.pos;
334                lines += res.lines;
335            } else {
336                pos = dest_end_pos;
337                lines = dest_end_lines;
338            }
339        }
340
341        // skip trailing spaces until the rest of the line
342        let mut chars = str[pos..].chars();
343        loop {
344            match chars.next() {
345                Some(' ' | '\t') => pos += 1,
346                Some('\n') | None => break,
347                Some(_) if title.is_some() => {
348                    // garbage at the end of the line after title,
349                    // but it could still be a valid reference if we roll back
350                    title = None;
351                    pos = dest_end_pos;
352                    lines = dest_end_lines;
353                    chars = str[pos..].chars();
354                }
355                Some(_) => {
356                    // garbage at the end of the line
357                    return None;
358                }
359            }
360        }
361
362        let references = state.root_ext.get_or_insert_default::<ReferenceMap>();
363        if !references.insert(str[1..label_end].to_owned(), href.clone(), title.clone()) { return None; }
364
365        Some((Node::new(
366            Definition { 
367                label: str[1..label_end].to_owned(), 
368                destination: href, 
369                title
370            }), 
371            lines + 1
372        ))
373    }
374}