markdown_that/generics/inline/
code_pair.rs

1//! Structure similar to `` `code span` `` with configurable markers of variable length.
2//!
3//! It allows you to define a custom structure with a variable number of markers
4//! (e.g., with `%` defined as a marker, the user can write `%foo%` or `%%%foo%%%`
5//! resulting in the same node).
6//!
7//! You add a custom structure by using [add_with] function, which takes the following arguments:
8//!  - `MARKER` - marker character
9//!  - `md` - parser instance
10//!  - `f` - function that should return your custom [Node]
11//!
12//! Here is an example of a rule turning `%foo%` into `🦀foo🦀`:
13//!
14//! ```rust
15//! use markdown_that::generics::inline::code_pair;
16//! use markdown_that::{MarkdownThat, Node, NodeValue, Renderer};
17//!
18//! #[derive(Debug)]
19//! struct Ferris;
20//! impl NodeValue for Ferris {
21//!     fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
22//!         fmt.text("🦀");
23//!         fmt.contents(&node.children);
24//!         fmt.text("🦀");
25//!     }
26//! }
27//!
28//! let md = &mut MarkdownThat::new();
29//! code_pair::add_with::<'%'>(md, |_| Node::new(Ferris));
30//! let html = md.parse("hello %world%").render();
31//! assert_eq!(html.trim(), "hello 🦀world🦀");
32//! ```
33//!
34//! This generic structure follows the exact rules of code span in CommonMark:
35//!
36//! 1. Literal marker character sequence can be used inside a structure if its length
37//! doesn't match the length of the opening/closing sequence (e.g., with `%` defined
38//! as a marker, `%%foo%bar%%` gets parsed as `Node("foo%bar")`).
39//!
40//! 2. Single space inside is trimmed to allow you to write `% %%foo %` to be parsed as
41//! `Node("%%foo")`.
42//!
43//! If you define two structures with the same marker, only the first one will work.
44//!
45use crate::parser::extset::{InlineRootExt, MarkdownThatExt};
46use crate::parser::inline::{InlineRule, InlineState, Text};
47use crate::{MarkdownThat, Node};
48
49#[derive(Debug, Default)]
50struct CodePairCache<const MARKER: char> {
51    scanned: bool,
52    max: Vec<usize>,
53}
54impl<const MARKER: char> InlineRootExt for CodePairCache<MARKER> {}
55
56#[derive(Debug)]
57struct CodePairConfig<const MARKER: char>(fn(usize) -> Node);
58impl<const MARKER: char> MarkdownThatExt for CodePairConfig<MARKER> {}
59
60pub fn add_with<const MARKER: char>(md: &mut MarkdownThat, f: fn(length: usize) -> Node) {
61    md.ext.insert(CodePairConfig::<MARKER>(f));
62
63    md.inline.add_rule::<CodePairScanner<MARKER>>();
64}
65
66#[doc(hidden)]
67pub struct CodePairScanner<const MARKER: char>;
68impl<const MARKER: char> InlineRule for CodePairScanner<MARKER> {
69    const MARKER: char = MARKER;
70
71    fn run(state: &mut InlineState) -> Option<(Node, usize)> {
72        let mut chars = state.src[state.pos..state.pos_max].chars();
73        if chars.next().unwrap() != MARKER {
74            return None;
75        }
76        if state.trailing_text_get().ends_with(MARKER) {
77            return None;
78        }
79
80        let mut pos = state.pos + 1;
81
82        // scan marker length
83        while Some(MARKER) == chars.next() {
84            pos += 1;
85        }
86
87        // backtick length => last seen position
88        let backticks = state
89            .inline_ext
90            .get_or_insert_default::<CodePairCache<MARKER>>();
91        let opener_len = pos - state.pos;
92
93        if backticks.scanned && backticks.max.get(opener_len).copied().unwrap_or(0) <= state.pos {
94            // performance note: adding an entire sequence into pending is 5x faster,
95            // but it will interfere with other rules working on the same char;
96            // and it is extremely rare that a user would put a thousand "`" in text
97            return None;
98        }
99
100        let mut match_start;
101        let mut match_end = pos;
102
103        // Nothing found in the cache, scan until the end of the line (or until the marker is found)
104        while let Some(p) = state.src[match_end..state.pos_max].find(MARKER) {
105            match_start = match_end + p;
106
107            // scan marker length
108            match_end = match_start + 1;
109            chars = state.src[match_end..state.pos_max].chars();
110
111            while Some(MARKER) == chars.next() {
112                match_end += 1;
113            }
114
115            let closer_len = match_end - match_start;
116
117            if closer_len == opener_len {
118                // Found matching closer length.
119                let mut content = state.src[pos..match_start].to_owned().replace('\n', " ");
120                if content.starts_with(' ') && content.ends_with(' ') && content.len() > 2 {
121                    content[1..content.len() - 1]
122                        .to_owned()
123                        .clone_into(&mut content);
124                    pos += 1;
125                    match_start -= 1;
126                }
127
128                let f = state.md.ext.get::<CodePairConfig<MARKER>>().unwrap().0;
129                let mut node = f(opener_len);
130
131                let mut inner_node = Node::new(Text { content });
132                inner_node.srcmap = state.get_map(pos, match_start);
133                node.children.push(inner_node);
134
135                return Some((node, match_end - state.pos));
136            }
137
138            // Some different length found, put it in cache as upper limit of where closer can be found
139            let backticks = state.inline_ext.get_mut::<CodePairCache<MARKER>>().unwrap();
140            while backticks.max.len() <= closer_len {
141                backticks.max.push(0);
142            }
143            backticks.max[closer_len] = match_start;
144        }
145
146        // Scanned through the end, didn't find anything
147        let backticks = state.inline_ext.get_mut::<CodePairCache<MARKER>>().unwrap();
148        backticks.scanned = true;
149
150        None
151    }
152}