markdown_that/generics/inline/
full_link.rs

1//! Structure similar to `[link](<to> "stuff")` with a configurable prefix.
2//!
3//! There are two structures in CommonMark that match this syntax:
4//!  - links - `[text](<href> "title")`
5//!  - images - `![alt](<src> "title")`
6//!
7//! You can add custom rules like `~[foo](<bar> "baz")`. Let us know if
8//! you come up with a fun use case to add as an example!
9//!
10//! Add a custom structure by using [add_prefix] function, which takes the following arguments:
11//!  - `PREFIX` - marker character before label (`!` in case of images)
12//!  - `ENABLE_NESTED` - allow nested links inside
13//!  - `md` - parser instance
14//!  - `f` - function that should return your custom [Node] given href and title
15//!
16use std::collections::HashMap;
17
18use crate::common::utils::unescape_all;
19use crate::parser::extset::{InlineRootExt, MarkdownThatExt};
20use crate::parser::inline::{InlineRule, InlineState};
21use crate::plugins::cmark::block::reference::ReferenceMap;
22use crate::{MarkdownThat, Node};
23
24#[derive(Debug)]
25struct LinkCfg<const PREFIX: char>(fn(Option<String>, Option<String>) -> Node);
26impl<const PREFIX: char> MarkdownThatExt for LinkCfg<PREFIX> {}
27
28/// adds custom rule with no prefix
29pub fn add<const ENABLE_NESTED: bool>(
30    md: &mut MarkdownThat,
31    f: fn(url: Option<String>, title: Option<String>) -> Node,
32) {
33    md.ext.insert(LinkCfg::<'\0'>(f));
34    md.inline.add_rule::<LinkScanner<ENABLE_NESTED>>();
35    if !md.inline.has_rule::<LinkScannerEnd>() {
36        md.inline.add_rule::<LinkScannerEnd>();
37    }
38}
39
40/// adds custom rule with the given ` PREFIX ` character
41pub fn add_prefix<const PREFIX: char, const ENABLE_NESTED: bool>(
42    md: &mut MarkdownThat,
43    f: fn(url: Option<String>, title: Option<String>) -> Node,
44) {
45    md.ext.insert(LinkCfg::<PREFIX>(f));
46    md.inline
47        .add_rule::<LinkPrefixScanner<PREFIX, ENABLE_NESTED>>();
48    if !md.inline.has_rule::<LinkScannerEnd>() {
49        md.inline.add_rule::<LinkScannerEnd>();
50    }
51}
52
53#[doc(hidden)]
54pub struct LinkScanner<const ENABLE_NESTED: bool>;
55impl<const ENABLE_NESTED: bool> InlineRule for LinkScanner<ENABLE_NESTED> {
56    const MARKER: char = '[';
57
58    fn check(state: &mut InlineState) -> Option<usize> {
59        let mut chars = state.src[state.pos..state.pos_max].chars();
60        if chars.next().unwrap() != '[' {
61            return None;
62        }
63        rule_check(state, ENABLE_NESTED, 0)
64    }
65
66    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
67        let mut chars = state.src[state.pos..state.pos_max].chars();
68        if chars.next().unwrap() != '[' {
69            return None;
70        }
71        let f = state.md.ext.get::<LinkCfg<'\0'>>().unwrap().0;
72        rule_run(state, ENABLE_NESTED, 0, f)
73    }
74}
75
76#[doc(hidden)]
77pub struct LinkPrefixScanner<const PREFIX: char, const ENABLE_NESTED: bool>;
78impl<const PREFIX: char, const ENABLE_NESTED: bool> InlineRule
79    for LinkPrefixScanner<PREFIX, ENABLE_NESTED>
80{
81    const MARKER: char = PREFIX;
82
83    fn check(state: &mut InlineState) -> Option<usize> {
84        let mut chars = state.src[state.pos..state.pos_max].chars();
85        if chars.next() != Some(PREFIX) {
86            return None;
87        }
88        if chars.next() != Some('[') {
89            return None;
90        }
91        rule_check(state, ENABLE_NESTED, 1)
92    }
93
94    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
95        let mut chars = state.src[state.pos..state.pos_max].chars();
96        if chars.next() != Some(PREFIX) {
97            return None;
98        }
99        if chars.next() != Some('[') {
100            return None;
101        }
102        let f = state.md.ext.get::<LinkCfg<PREFIX>>().unwrap().0;
103        rule_run(state, ENABLE_NESTED, 1, f)
104    }
105}
106
107#[doc(hidden)]
108/// this rule makes sure that the parser is stopped on "]" character,
109/// but it actually doesn't do anything
110pub struct LinkScannerEnd;
111impl InlineRule for LinkScannerEnd {
112    const MARKER: char = ']';
113
114    fn check(_: &mut InlineState) -> Option<usize> {
115        None
116    }
117    fn run(_: &mut InlineState) -> Option<(Node, usize)> {
118        None
119    }
120}
121
122fn rule_check(state: &mut InlineState, enable_nested: bool, offset: usize) -> Option<usize> {
123    if let Some(result) = parse_link(state, state.pos + offset, enable_nested) {
124        Some(result.end - state.pos)
125    } else {
126        None
127    }
128}
129
130fn rule_run(
131    state: &mut InlineState,
132    enable_nested: bool,
133    offset: usize,
134    f: fn(Option<String>, Option<String>) -> Node,
135) -> Option<(Node, usize)> {
136    let start = state.pos;
137    let result = parse_link(state, state.pos + offset, enable_nested)?;
138
139    //
140    // We found the end of the link, and know for a fact it's a valid link;
141    // so all that's left to do is to call tokenizer.
142    //
143    let old_node = std::mem::replace(&mut state.node, f(result.href, result.title));
144    let max = state.pos_max;
145
146    state.link_level += 1;
147    state.pos = result.label_start;
148    state.pos_max = result.label_end;
149    state.md.inline.tokenize(state);
150    state.pos = start;
151    state.pos_max = max;
152    state.link_level -= 1;
153
154    let node = std::mem::replace(&mut state.node, old_node);
155    Some((node, result.end - state.pos))
156}
157
158#[derive(Debug, Default)]
159struct LinkLabelScanCache(HashMap<(usize, bool), Option<usize>>);
160impl InlineRootExt for LinkLabelScanCache {}
161
162// Parse link label
163//
164// this function assumes that the first character ("[") already matches;
165// returns the end of the label
166fn parse_link_label(state: &mut InlineState, start: usize, enable_nested: bool) -> Option<usize> {
167    let cache = state
168        .inline_ext
169        .get_or_insert_default::<LinkLabelScanCache>();
170    if let Some(&cached) = cache.0.get(&(start, enable_nested)) {
171        return cached;
172    }
173
174    let old_pos = state.pos;
175    let mut found = false;
176    let mut label_end = None;
177    let mut level = 1;
178
179    state.pos = start + 1;
180
181    while let Some(ch) = state.src[state.pos..state.pos_max].chars().next() {
182        if ch == ']' {
183            level -= 1;
184            if level == 0 {
185                found = true;
186                break;
187            }
188        }
189
190        let prev_pos = state.pos;
191        state.md.inline.skip_token(state);
192        if ch == '[' {
193            if prev_pos == state.pos - 1 {
194                // increase the level if we find text `[`, which is not a part of any token
195                level += 1;
196
197                let cache = state
198                    .inline_ext
199                    .get_or_insert_default::<LinkLabelScanCache>();
200                if let Some(&cached) = cache.0.get(&(prev_pos, enable_nested)) {
201                    // maybe cache appeared as a result of skip_token
202                    if let Some(cached_pos) = cached {
203                        state.pos = cached_pos;
204                    } else {
205                        break;
206                    }
207                }
208            } else if !enable_nested {
209                break;
210            }
211        }
212    }
213
214    if found {
215        label_end = Some(state.pos);
216    }
217
218    // restore old state
219    state.pos = old_pos;
220
221    let cache = state
222        .inline_ext
223        .get_or_insert_default::<LinkLabelScanCache>();
224    cache.0.insert((start, enable_nested), label_end);
225
226    label_end
227}
228
229pub struct ParseLinkFragmentResult {
230    /// end position
231    pub pos: usize,
232    /// number of linebreaks inside
233    pub lines: usize,
234    /// parsed result
235    pub str: String,
236}
237
238/// Helper function used to parse `<href>` part of the links with optional brackets.
239pub fn parse_link_destination(
240    str: &str,
241    start: usize,
242    max: usize,
243) -> Option<ParseLinkFragmentResult> {
244    let mut chars = str[start..max].chars().peekable();
245    let mut pos = start;
246
247    if let Some('<') = chars.peek() {
248        chars.next(); // skip '<'
249        pos += 1;
250        loop {
251            match chars.next() {
252                Some('\n' | '<') | None => return None,
253                Some('>') => {
254                    return Some(ParseLinkFragmentResult {
255                        pos: pos + 1,
256                        lines: 0,
257                        str: unescape_all(&str[start + 1..pos]).into_owned(),
258                    });
259                }
260                Some('\\') => match chars.next() {
261                    None => return None,
262                    Some(x) => pos += 1 + x.len_utf8(),
263                },
264                Some(x) => {
265                    pos += x.len_utf8();
266                }
267            }
268        }
269    } else {
270        let mut level: u32 = 0;
271        loop {
272            match chars.next() {
273                // space and ascii control characters
274                Some('\0'..=' ' | '\x7f') | None => break,
275                Some('\\') => match chars.next() {
276                    Some(' ') | None => break,
277                    Some(x) => pos += 1 + x.len_utf8(),
278                },
279                Some('(') => {
280                    level += 1;
281                    if level > 32 {
282                        return None;
283                    }
284                    pos += 1;
285                }
286                Some(')') => {
287                    if level == 0 {
288                        break;
289                    }
290                    level -= 1;
291                    pos += 1;
292                }
293                Some(x) => {
294                    pos += x.len_utf8();
295                }
296            }
297        }
298
299        if level != 0 {
300            return None;
301        }
302
303        Some(ParseLinkFragmentResult {
304            pos,
305            lines: 0,
306            str: unescape_all(&str[start..pos]).into_owned(),
307        })
308    }
309}
310
311/// Helper function used to parse `"title"` part of the links (with `'title'` or `(title)` alternative syntax).
312pub fn parse_link_title(str: &str, start: usize, max: usize) -> Option<ParseLinkFragmentResult> {
313    let mut chars = str[start..max].chars();
314    let mut pos = start + 1;
315    let mut lines = 0;
316
317    let marker = match chars.next() {
318        Some('"') => '"',
319        Some('\'') => '\'',
320        Some('(') => ')',
321        None | Some(_) => return None,
322    };
323
324    loop {
325        match chars.next() {
326            Some(ch) if ch == marker => {
327                return Some(ParseLinkFragmentResult {
328                    pos: pos + 1,
329                    lines,
330                    str: unescape_all(&str[start + 1..pos]).into_owned(),
331                });
332            }
333            Some('(') if marker == ')' => {
334                return None;
335            }
336            Some('\n') => {
337                pos += 1;
338                lines += 1;
339            }
340            Some('\\') => match chars.next() {
341                None => return None,
342                Some(x) => pos += 1 + x.len_utf8(),
343            },
344            Some(x) => {
345                pos += x.len_utf8();
346            }
347            None => {
348                return None;
349            }
350        }
351    }
352}
353
354struct ParseLinkResult {
355    pub label_start: usize,
356    pub label_end: usize,
357    pub href: Option<String>,
358    pub title: Option<String>,
359    pub end: usize,
360}
361
362// Parses [link](<to> "stuff")
363//
364// this function assumes that the first character ("[") already matches
365//
366fn parse_link(state: &mut InlineState, pos: usize, enable_nested: bool) -> Option<ParseLinkResult> {
367    let label_end = parse_link_label(state, pos, enable_nested)?;
368    let label_start = pos + 1;
369    let mut pos = label_end + 1;
370    let mut chars = state.src[pos..state.pos_max].chars();
371    let mut href = None;
372    let mut title = None;
373
374    if let Some('(') = chars.next() {
375        //
376        // Inline link
377        //
378
379        // [link](  <href>  "title"  )
380        //        ^^ skipping these spaces
381        pos += 1;
382        while let Some(' ' | '\t' | '\n') = chars.next() {
383            pos += 1;
384        }
385
386        // [link](  <href>  "title"  )
387        //          ^^^^^^ parsing link destination
388        if let Some(res) = parse_link_destination(&state.src, pos, state.pos_max) {
389            let href_candidate = state.md.link_formatter.normalize_link(&res.str);
390            if state
391                .md
392                .link_formatter
393                .validate_link(&href_candidate)
394                .is_some()
395            {
396                pos = res.pos;
397                href = Some(href_candidate);
398            }
399
400            // [link](  <href>  "title"  )
401            //                ^^ skipping these spaces
402            let mut chars = state.src[pos..state.pos_max].chars();
403            while let Some(' ' | '\t' | '\n') = chars.next() {
404                pos += 1;
405            }
406
407            if let Some(res) = parse_link_title(&state.src, pos, state.pos_max) {
408                title = Some(res.str);
409                pos = res.pos;
410
411                // [link](  <href>  "title"  )
412                //                         ^^ skipping these spaces
413                let mut chars = state.src[pos..state.pos_max].chars();
414                while let Some(' ' | '\t' | '\n') = chars.next() {
415                    pos += 1;
416                }
417            }
418        }
419
420        if let Some(')') = state.src[pos..state.pos_max].chars().next() {
421            return Some(ParseLinkResult {
422                label_start,
423                label_end,
424                href,
425                title,
426                end: pos + 1,
427            });
428        }
429    }
430
431    //
432    // Link reference
433    //
434    // TODO: check if I have any references?
435    pos = label_end + 1;
436    let mut maybe_label = None;
437
438    match state.src[pos..state.pos_max].chars().next() {
439        Some('[') => {
440            if let Some(x) = parse_link_label(state, pos, false) {
441                maybe_label = Some(&state.src[pos + 1..x]);
442                pos = x + 1;
443            } else {
444                pos = label_end + 1;
445            }
446        }
447        _ => pos = label_end + 1,
448    }
449
450    let references = state.root_ext.get::<ReferenceMap>()?;
451
452    // covers label === '' and label === undefined
453    // (collapsed reference link and shortcut reference link respectively)
454    let label = if matches!(maybe_label, None | Some("")) {
455        &state.src[label_start..label_end]
456    } else {
457        maybe_label.unwrap()
458    };
459
460    let (destination, title) = references.get(label)?;
461
462    Some(ParseLinkResult {
463        label_start,
464        label_end,
465        href: Some(destination.to_owned()),
466        title: title.map(|s| s.to_owned()),
467        end: pos,
468    })
469}