markdown_that/generics/inline/code_pair.rs
1//! Structure similar to `` `code span` `` with configurable markers of variable length.
2//!
3//! It allows you to define a custom structure with a variable number of markers
4//! (e.g., with `%` defined as a marker, the user can write `%foo%` or `%%%foo%%%`
5//! resulting in the same node).
6//!
7//! You add a custom structure by using [add_with] function, which takes the following arguments:
8//! - `MARKER` - marker character
9//! - `md` - parser instance
10//! - `f` - function that should return your custom [Node]
11//!
12//! Here is an example of a rule turning `%foo%` into `🦀foo🦀`:
13//!
14//! ```rust
15//! use markdown_that::generics::inline::code_pair;
16//! use markdown_that::{MarkdownThat, Node, NodeValue, Renderer};
17//!
18//! #[derive(Debug)]
19//! struct Ferris;
20//! impl NodeValue for Ferris {
21//! fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
22//! fmt.text("🦀");
23//! fmt.contents(&node.children);
24//! fmt.text("🦀");
25//! }
26//! }
27//!
28//! let md = &mut MarkdownThat::new();
29//! code_pair::add_with::<'%'>(md, |_| Node::new(Ferris));
30//! let html = md.parse("hello %world%").render();
31//! assert_eq!(html.trim(), "hello 🦀world🦀");
32//! ```
33//!
34//! This generic structure follows the exact rules of code span in CommonMark:
35//!
36//! 1. Literal marker character sequence can be used inside a structure if its length
37//! doesn't match the length of the opening/closing sequence (e.g., with `%` defined
38//! as a marker, `%%foo%bar%%` gets parsed as `Node("foo%bar")`).
39//!
40//! 2. Single space inside is trimmed to allow you to write `% %%foo %` to be parsed as
41//! `Node("%%foo")`.
42//!
43//! If you define two structures with the same marker, only the first one will work.
44//!
45use crate::parser::extset::{InlineRootExt, MarkdownThatExt};
46use crate::parser::inline::{InlineRule, InlineState, Text};
47use crate::{MarkdownThat, Node};
48
49#[derive(Debug, Default)]
50struct CodePairCache<const MARKER: char> {
51 scanned: bool,
52 max: Vec<usize>,
53}
54impl<const MARKER: char> InlineRootExt for CodePairCache<MARKER> {}
55
56#[derive(Debug)]
57struct CodePairConfig<const MARKER: char>(fn(usize) -> Node);
58impl<const MARKER: char> MarkdownThatExt for CodePairConfig<MARKER> {}
59
60pub fn add_with<const MARKER: char>(md: &mut MarkdownThat, f: fn(length: usize) -> Node) {
61 md.ext.insert(CodePairConfig::<MARKER>(f));
62
63 md.inline.add_rule::<CodePairScanner<MARKER>>();
64}
65
66#[doc(hidden)]
67pub struct CodePairScanner<const MARKER: char>;
68impl<const MARKER: char> InlineRule for CodePairScanner<MARKER> {
69 const MARKER: char = MARKER;
70
71 fn run(state: &mut InlineState) -> Option<(Node, usize)> {
72 let mut chars = state.src[state.pos..state.pos_max].chars();
73 if chars.next().unwrap() != MARKER {
74 return None;
75 }
76 if state.trailing_text_get().ends_with(MARKER) {
77 return None;
78 }
79
80 let mut pos = state.pos + 1;
81
82 // scan marker length
83 while Some(MARKER) == chars.next() {
84 pos += 1;
85 }
86
87 // backtick length => last seen position
88 let backticks = state
89 .inline_ext
90 .get_or_insert_default::<CodePairCache<MARKER>>();
91 let opener_len = pos - state.pos;
92
93 if backticks.scanned && backticks.max.get(opener_len).copied().unwrap_or(0) <= state.pos {
94 // performance note: adding an entire sequence into pending is 5x faster,
95 // but it will interfere with other rules working on the same char;
96 // and it is extremely rare that a user would put a thousand "`" in text
97 return None;
98 }
99
100 let mut match_start;
101 let mut match_end = pos;
102
103 // Nothing found in the cache, scan until the end of the line (or until the marker is found)
104 while let Some(p) = state.src[match_end..state.pos_max].find(MARKER) {
105 match_start = match_end + p;
106
107 // scan marker length
108 match_end = match_start + 1;
109 chars = state.src[match_end..state.pos_max].chars();
110
111 while Some(MARKER) == chars.next() {
112 match_end += 1;
113 }
114
115 let closer_len = match_end - match_start;
116
117 if closer_len == opener_len {
118 // Found matching closer length.
119 let mut content = state.src[pos..match_start].to_owned().replace('\n', " ");
120 if content.starts_with(' ') && content.ends_with(' ') && content.len() > 2 {
121 content[1..content.len() - 1]
122 .to_owned()
123 .clone_into(&mut content);
124 pos += 1;
125 match_start -= 1;
126 }
127
128 let f = state.md.ext.get::<CodePairConfig<MARKER>>().unwrap().0;
129 let mut node = f(opener_len);
130
131 let mut inner_node = Node::new(Text { content });
132 inner_node.srcmap = state.get_map(pos, match_start);
133 node.children.push(inner_node);
134
135 return Some((node, match_end - state.pos));
136 }
137
138 // Some different length found, put it in cache as upper limit of where closer can be found
139 let backticks = state.inline_ext.get_mut::<CodePairCache<MARKER>>().unwrap();
140 while backticks.max.len() <= closer_len {
141 backticks.max.push(0);
142 }
143 backticks.max[closer_len] = match_start;
144 }
145
146 // Scanned through the end, didn't find anything
147 let backticks = state.inline_ext.get_mut::<CodePairCache<MARKER>>().unwrap();
148 backticks.scanned = true;
149
150 None
151 }
152}