markdown_it/generics/inline/
full_link.rs

1//! Structure similar to `[link](<to> "stuff")` with configurable prefix.
2//!
3//! There are two structures in CommonMark that match this syntax:
4//!  - links - `[text](<href> "title")`
5//!  - images - `![alt](<src> "title")`
6//!
7//! You can add custom rules like `~[foo](<bar> "baz")`. Let us know if
8//! you come up with fun use case to add as an example!
9//!
10//! Add a custom structure by using [add_prefix] function, which takes following arguments:
11//!  - `PREFIX` - marker character before label (`!` in case of images)
12//!  - `ENABLE_NESTED` - allow nested links inside
13//!  - `md` - parser instance
14//!  - `f` - function that should return your custom [Node] given href and title
15//!
16use std::collections::HashMap;
17
18use crate::common::utils::unescape_all;
19use crate::parser::extset::{InlineRootExt, MarkdownItExt};
20use crate::parser::inline::{InlineRule, InlineState};
21use crate::plugins::cmark::block::reference::ReferenceMap;
22use crate::{MarkdownIt, Node};
23
24#[derive(Debug)]
25struct LinkCfg<const PREFIX: char>(fn (Option<String>, Option<String>) -> Node);
26impl<const PREFIX: char> MarkdownItExt for LinkCfg<PREFIX> {}
27
28/// adds custom rule with no prefix
29pub fn add<const ENABLE_NESTED: bool>(
30    md: &mut MarkdownIt,
31    f: fn (url: Option<String>, title: Option<String>) -> Node
32) {
33    md.ext.insert(LinkCfg::<'\0'>(f));
34    md.inline.add_rule::<LinkScanner<ENABLE_NESTED>>();
35    if !md.inline.has_rule::<LinkScannerEnd>() {
36        md.inline.add_rule::<LinkScannerEnd>();
37    }
38}
39
40/// adds custom rule with given `PREFIX` character
41pub fn add_prefix<const PREFIX: char, const ENABLE_NESTED: bool>(
42    md: &mut MarkdownIt,
43    f: fn (url: Option<String>, title: Option<String>) -> Node
44) {
45    md.ext.insert(LinkCfg::<PREFIX>(f));
46    md.inline.add_rule::<LinkPrefixScanner<PREFIX, ENABLE_NESTED>>();
47    if !md.inline.has_rule::<LinkScannerEnd>() {
48        md.inline.add_rule::<LinkScannerEnd>();
49    }
50}
51
52#[doc(hidden)]
53pub struct LinkScanner<const ENABLE_NESTED: bool>;
54impl<const ENABLE_NESTED: bool> InlineRule for LinkScanner<ENABLE_NESTED> {
55    const MARKER: char = '[';
56
57    fn check(state: &mut InlineState) -> Option<usize> {
58        let mut chars = state.src[state.pos..state.pos_max].chars();
59        if chars.next().unwrap() != '[' { return None; }
60        rule_check(state, ENABLE_NESTED, 0)
61    }
62
63    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
64        let mut chars = state.src[state.pos..state.pos_max].chars();
65        if chars.next().unwrap() != '[' { return None; }
66        let f = state.md.ext.get::<LinkCfg<'\0'>>().unwrap().0;
67        rule_run(state, ENABLE_NESTED, 0, f)
68    }
69}
70
71#[doc(hidden)]
72pub struct LinkPrefixScanner<const PREFIX: char, const ENABLE_NESTED: bool>;
73impl<const PREFIX: char, const ENABLE_NESTED: bool> InlineRule for LinkPrefixScanner<PREFIX, ENABLE_NESTED> {
74    const MARKER: char = PREFIX;
75
76    fn check(state: &mut InlineState) -> Option<usize> {
77        let mut chars = state.src[state.pos..state.pos_max].chars();
78        if chars.next() != Some(PREFIX) { return None; }
79        if chars.next() != Some('[') { return None; }
80        rule_check(state, ENABLE_NESTED, 1)
81    }
82
83    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
84        let mut chars = state.src[state.pos..state.pos_max].chars();
85        if chars.next() != Some(PREFIX) { return None; }
86        if chars.next() != Some('[') { return None; }
87        let f = state.md.ext.get::<LinkCfg<PREFIX>>().unwrap().0;
88        rule_run(state, ENABLE_NESTED, 1, f)
89    }
90}
91
92#[doc(hidden)]
93/// this rule makes sure that parser is stopped on "]" character,
94/// but it actually doesn't do anything
95pub struct LinkScannerEnd;
96impl InlineRule for LinkScannerEnd {
97    const MARKER: char = ']';
98
99    fn check(_: &mut InlineState) -> Option<usize> { None }
100    fn run(_: &mut InlineState) -> Option<(Node, usize)> { None }
101}
102
103fn rule_check(state: &mut InlineState, enable_nested: bool, offset: usize) -> Option<usize> {
104    if let Some(result) = parse_link(state, state.pos + offset, enable_nested) {
105        Some(result.end - state.pos)
106    } else {
107        None
108    }
109}
110
111fn rule_run(
112    state: &mut InlineState,
113    enable_nested: bool,
114    offset: usize,
115    f: fn (Option<String>, Option<String>) -> Node
116) -> Option<(Node, usize)> {
117    let start = state.pos;
118    let result = parse_link(state, state.pos + offset, enable_nested)?;
119
120    //
121    // We found the end of the link, and know for a fact it's a valid link;
122    // so all that's left to do is to call tokenizer.
123    //
124    let old_node = std::mem::replace(&mut state.node, f(result.href, result.title));
125    let max = state.pos_max;
126
127    state.link_level += 1;
128    state.pos = result.label_start;
129    state.pos_max = result.label_end;
130    state.md.inline.tokenize(state);
131    state.pos = start;
132    state.pos_max = max;
133    state.link_level -= 1;
134
135    let node = std::mem::replace(&mut state.node, old_node);
136    Some((node, result.end - state.pos))
137}
138
139#[derive(Debug, Default)]
140struct LinkLabelScanCache(HashMap<(usize, bool), Option<usize>>);
141impl InlineRootExt for LinkLabelScanCache {}
142
143
144// Parse link label
145//
146// this function assumes that first character ("[") already matches;
147// returns the end of the label
148fn parse_link_label(state: &mut InlineState, start: usize, enable_nested: bool) -> Option<usize> {
149    let cache = state.inline_ext.get_or_insert_default::<LinkLabelScanCache>();
150    if let Some(&cached) = cache.0.get(&(start, enable_nested)) {
151        return cached;
152    }
153
154    let old_pos = state.pos;
155    let mut found = false;
156    let mut label_end = None;
157    let mut level = 1;
158
159    state.pos = start + 1;
160
161    while let Some(ch) = state.src[state.pos..state.pos_max].chars().next() {
162        if ch == ']' {
163            level -= 1;
164            if level == 0 {
165                found = true;
166                break;
167            }
168        }
169
170        let prev_pos = state.pos;
171        state.md.inline.skip_token(state);
172        if ch == '[' {
173            if prev_pos == state.pos - 1 {
174                // increase level if we find text `[`, which is not a part of any token
175                level += 1;
176
177                let cache = state.inline_ext.get_or_insert_default::<LinkLabelScanCache>();
178                if let Some(&cached) = cache.0.get(&(prev_pos, enable_nested)) {
179                    // maybe cache appeared as a result of skip_token
180                    if let Some(cached_pos) = cached {
181                        state.pos = cached_pos;
182                    } else {
183                        break;
184                    }
185                }
186
187            } else if !enable_nested {
188                break;
189            }
190        }
191    }
192
193    if found {
194        label_end = Some(state.pos);
195    }
196
197    // restore old state
198    state.pos = old_pos;
199
200    let cache = state.inline_ext.get_or_insert_default::<LinkLabelScanCache>();
201    cache.0.insert((start, enable_nested), label_end);
202
203    label_end
204}
205
206
207pub struct ParseLinkFragmentResult {
208    /// end position
209    pub pos:   usize,
210    /// number of linebreaks inside
211    pub lines: usize,
212    /// parsed result
213    pub str:   String,
214}
215
216
217/// Helper function used to parse `<href>` part of the links with optional brackets.
218pub fn parse_link_destination(str: &str, start: usize, max: usize) -> Option<ParseLinkFragmentResult> {
219    let mut chars = str[start..max].chars().peekable();
220    let mut pos = start;
221
222    if let Some('<') = chars.peek() {
223        chars.next(); // skip '<'
224        pos += 1;
225        loop {
226            match chars.next() {
227                Some('\n' | '<') | None => return None,
228                Some('>') => {
229                    return Some(ParseLinkFragmentResult {
230                        pos: pos + 1,
231                        lines: 0,
232                        str: unescape_all(&str[start + 1..pos]).into_owned(),
233                    });
234                }
235                Some('\\') => {
236                    match chars.next() {
237                        None => return None,
238                        Some(x) => pos += 1 + x.len_utf8(),
239                    }
240                }
241                Some(x) => {
242                    pos += x.len_utf8();
243                }
244            }
245        }
246    } else {
247        let mut level : u32 = 0;
248        loop {
249            match chars.next() {
250                // space + ascii control characters
251                Some('\0'..=' ' | '\x7f') | None => break,
252                Some('\\') => {
253                    match chars.next() {
254                        Some(' ') | None => break,
255                        Some(x) => pos += 1 + x.len_utf8(),
256                    }
257                }
258                Some('(') => {
259                    level += 1;
260                    if level > 32 { return None; }
261                    pos += 1;
262                }
263                Some(')') => {
264                    if level == 0 { break; }
265                    level -= 1;
266                    pos += 1;
267                }
268                Some(x) => {
269                    pos += x.len_utf8();
270                }
271            }
272        }
273
274        if level != 0 { return None; }
275
276        Some(ParseLinkFragmentResult {
277            pos,
278            lines: 0,
279            str: unescape_all(&str[start..pos]).into_owned(),
280        })
281    }
282}
283
284
285/// Helper function used to parse `"title"` part of the links (with `'title'` or `(title)` alternative syntax).
286pub fn parse_link_title(str: &str, start: usize, max: usize) -> Option<ParseLinkFragmentResult> {
287    let mut chars = str[start..max].chars();
288    let mut pos = start + 1;
289    let mut lines = 0;
290
291    let marker = match chars.next() {
292        Some('"')  => '"',
293        Some('\'') => '\'',
294        Some('(')  => ')',
295        None | Some(_) => return None,
296    };
297
298    loop {
299        match chars.next() {
300            Some(ch) if ch == marker => {
301                return Some(ParseLinkFragmentResult {
302                    pos: pos + 1,
303                    lines,
304                    str: unescape_all(&str[start + 1..pos]).into_owned(),
305                });
306            }
307            Some('(') if marker == ')' => {
308                return None;
309            }
310            Some('\n') => {
311                pos += 1;
312                lines += 1;
313            }
314            Some('\\') => {
315                match chars.next() {
316                    None => return None,
317                    Some(x) => pos += 1 + x.len_utf8(),
318                }
319            }
320            Some(x) => {
321                pos += x.len_utf8();
322            }
323            None => {
324                return None;
325            }
326        }
327    }
328}
329
330struct ParseLinkResult {
331    pub label_start: usize,
332    pub label_end: usize,
333    pub href: Option<String>,
334    pub title: Option<String>,
335    pub end: usize,
336}
337
338// Parses [link](<to> "stuff")
339//
340// this function assumes that first character ("[") already matches
341//
342fn parse_link(state: &mut InlineState, pos: usize, enable_nested: bool) -> Option<ParseLinkResult> {
343    let label_end = parse_link_label(state, pos, enable_nested)?;
344    let label_start = pos + 1;
345    let mut pos = label_end + 1;
346    let mut chars = state.src[pos..state.pos_max].chars();
347    let mut href = None;
348    let mut title = None;
349
350    if let Some('(') = chars.next() {
351        //
352        // Inline link
353        //
354
355        // [link](  <href>  "title"  )
356        //        ^^ skipping these spaces
357        pos += 1;
358        while let Some(' ' | '\t' | '\n') = chars.next() {
359            pos += 1;
360        }
361
362        // [link](  <href>  "title"  )
363        //          ^^^^^^ parsing link destination
364        if let Some(res) = parse_link_destination(&state.src, pos, state.pos_max) {
365            let href_candidate = state.md.link_formatter.normalize_link(&res.str);
366            if state.md.link_formatter.validate_link(&href_candidate).is_some() {
367                pos = res.pos;
368                href = Some(href_candidate);
369            }
370
371            // [link](  <href>  "title"  )
372            //                ^^ skipping these spaces
373            let mut chars = state.src[pos..state.pos_max].chars();
374            while let Some(' ' | '\t' | '\n') = chars.next() {
375                pos += 1;
376            }
377
378            if let Some(res) = parse_link_title(&state.src, pos, state.pos_max) {
379                title = Some(res.str);
380                pos = res.pos;
381
382                // [link](  <href>  "title"  )
383                //                         ^^ skipping these spaces
384                let mut chars = state.src[pos..state.pos_max].chars();
385                while let Some(' ' | '\t' | '\n') = chars.next() {
386                    pos += 1;
387                }
388            }
389        }
390
391        if let Some(')') = state.src[pos..state.pos_max].chars().next() {
392            return Some(ParseLinkResult {
393                label_start,
394                label_end,
395                href,
396                title,
397                end: pos + 1,
398            })
399        }
400    }
401
402    //
403    // Link reference
404    //
405    // TODO: check if I have any references?
406    pos = label_end + 1;
407    let mut maybe_label = None;
408
409    match state.src[pos..state.pos_max].chars().next() {
410        Some('[') => {
411            if let Some(x) = parse_link_label(state, pos, false) {
412                maybe_label = Some(&state.src[pos + 1..x]);
413                pos = x + 1;
414            } else {
415                pos = label_end + 1;
416            }
417        }
418        _ => pos = label_end + 1,
419    }
420
421    let references = state.root_ext.get::<ReferenceMap>()?;
422
423    // covers label === '' and label === undefined
424    // (collapsed reference link and shortcut reference link respectively)
425    let label = if matches!(maybe_label, None | Some("")) {
426        &state.src[label_start..label_end]
427    } else {
428        maybe_label.unwrap()
429    };
430
431    let (destination, title) = references.get(label)?;
432
433    Some(ParseLinkResult {
434        label_start,
435        label_end,
436        href: Some(destination.to_owned()),
437        title: title.map(|s| s.to_owned()),
438        end: pos,
439    })
440}