markdown_syntax/options.rs
1//! Parser configuration: which Markdown constructs are recognized and how.
2//!
3//! [`SyntaxOptions`] is the entry point — pick a preset, optionally tune it with
4//! the [`Construct`] builder, then call [`SyntaxOptions::parse`]. [`Constructs`]
5//! is the exhaustive per-feature flag set behind it, and [`ParseOptions`] holds
6//! the lexing knobs.
7
8use alloc::string::String;
9
10/// The full set of syntactic constructs the parser may recognize, one boolean
11/// per feature. This is the exhaustive escape hatch; most callers use the
12/// [`Constructs::commonmark`]/[`gfm`](Constructs::gfm)/[`mdx`](Constructs::mdx)/
13/// [`max`](Constructs::max) presets or the [`Construct`] builder instead of
14/// setting fields directly.
15#[derive(Clone, Debug, Eq, PartialEq)]
16pub struct Constructs {
17 /// Raw HTML blocks, e.g. a `<div>…</div>` block at the top level.
18 pub html_block: bool,
19 /// Raw inline HTML, e.g. `<span>` within a paragraph.
20 pub html_inline: bool,
21 /// Indented code blocks (each line indented four spaces or a tab).
22 pub indented_code: bool,
23 /// GFM pipe tables: a `| a | b |` row over a `|---|---|` delimiter row.
24 pub gfm_table: bool,
25 /// GFM task list items: `- [ ]` (unchecked) and `- [x]` (checked).
26 pub gfm_task_list_item: bool,
27 /// GFM strikethrough: `~~text~~`.
28 pub gfm_strikethrough: bool,
29 /// GFM literal autolinks: a bare `https://…`, `www.…`, or email becomes a
30 /// link without angle brackets.
31 pub gfm_autolink_literal: bool,
32 /// cmark-gfm "relaxed" URL autolinks: bare `scheme://` URLs (and a bare
33 /// leading `://`) are auto-linkified without angle brackets, e.g. `smb://`,
34 /// `irc://`, `rdar://`. This is a cmark extension beyond the GFM spec (which
35 /// defines only `http(s)://`/`www.`/email); on by default in `gfm()` for
36 /// GitHub/cmark-gfm parity. The angle form `<scheme:…>` works regardless.
37 pub relaxed_autolinks: bool,
38 /// GFM alerts: a `> [!NOTE]` (TIP/IMPORTANT/WARNING/CAUTION) blockquote.
39 pub gfm_alert: bool,
40 /// Underline spans: `__text__`. This overrides CommonMark's `__`-as-strong,
41 /// so it is off in the [`max`](Constructs::max) default.
42 pub underline: bool,
43 /// CriticMarkup-style insertions: `++text++`.
44 pub insert: bool,
45 /// Highlight / "mark" spans: `==text==`.
46 pub highlight: bool,
47 /// Subscript: a single-tilde span `~text~` (no spaces).
48 pub subscript: bool,
49 /// Superscript: `^text^`.
50 pub superscript: bool,
51 /// Spoiler spans: `||text||`.
52 pub spoiler: bool,
53 /// Emoji-style shortcodes: `:tada:`.
54 pub shortcode: bool,
55 /// Description (definition) lists: a term followed by `:`-led details.
56 pub description_list: bool,
57 /// Footnote definitions: `[^1]: the footnote body`.
58 pub footnote_definition: bool,
59 /// Footnote references: `[^1]` in running text.
60 pub footnote_reference: bool,
61 /// Inline footnotes: `^[the note inline]` (also needs `footnote_reference`).
62 pub inline_footnote: bool,
63 /// Block math: a `$$ … $$` fenced block.
64 pub math_block: bool,
65 /// Inline math: `$x$` (and the math-code form `` $`x`$ ``).
66 pub math_inline: bool,
67 /// A leading frontmatter block at the start of the document: `---` YAML or
68 /// `+++` TOML.
69 pub frontmatter: bool,
70 /// Wikilinks with the display title after the pipe: `[[target|title]]`
71 /// (the Obsidian convention). Mutually exclusive with the before-pipe order.
72 pub wikilink_title_after_pipe: bool,
73 /// Wikilinks with the display title before the pipe: `[[title|target]]`.
74 /// Mutually exclusive with the after-pipe order.
75 pub wikilink_title_before_pipe: bool,
76 /// MDX ESM: `import`/`export` statement lines.
77 pub mdx_esm: bool,
78 /// MDX block-level `{ … }` expressions.
79 pub mdx_expression_block: bool,
80 /// MDX inline `{ … }` expressions within text.
81 pub mdx_expression_inline: bool,
82 /// MDX block-level JSX: `<Component/>` as a block. Conflicts with raw HTML.
83 pub mdx_jsx_block: bool,
84 /// MDX inline JSX: `<Component/>` within text. Conflicts with raw HTML.
85 pub mdx_jsx_inline: bool,
86 /// Inline directive: `:name[label]{key=val}`. A directive, not MDX.
87 pub directive_text: bool,
88 /// Leaf directive: `::name[label]{key=val}` on its own line. A directive,
89 /// not MDX.
90 pub directive_leaf: bool,
91 /// Container directive: a `:::name … :::` fenced block. A directive, not MDX.
92 pub directive_container: bool,
93}
94
95impl Constructs {
96 /// The CommonMark baseline: raw HTML and indented code, no extensions.
97 pub const fn commonmark() -> Self {
98 Self {
99 html_block: true,
100 html_inline: true,
101 indented_code: true,
102 gfm_table: false,
103 gfm_task_list_item: false,
104 gfm_strikethrough: false,
105 gfm_autolink_literal: false,
106 relaxed_autolinks: false,
107 gfm_alert: false,
108 underline: false,
109 insert: false,
110 highlight: false,
111 subscript: false,
112 superscript: false,
113 spoiler: false,
114 shortcode: false,
115 description_list: false,
116 footnote_definition: false,
117 footnote_reference: false,
118 inline_footnote: false,
119 math_block: false,
120 math_inline: false,
121 frontmatter: false,
122 wikilink_title_after_pipe: false,
123 wikilink_title_before_pipe: false,
124 mdx_esm: false,
125 mdx_expression_block: false,
126 mdx_expression_inline: false,
127 mdx_jsx_block: false,
128 mdx_jsx_inline: false,
129 directive_text: false,
130 directive_leaf: false,
131 directive_container: false,
132 }
133 }
134
135 /// GitHub Flavored Markdown: CommonMark plus tables, task lists,
136 /// strikethrough, literal autolinks, and footnotes.
137 pub const fn gfm() -> Self {
138 let mut constructs = Self::commonmark();
139 constructs.gfm_table = true;
140 constructs.gfm_task_list_item = true;
141 constructs.gfm_strikethrough = true;
142 constructs.gfm_autolink_literal = true;
143 constructs.relaxed_autolinks = true;
144 constructs.footnote_definition = true;
145 constructs.footnote_reference = true;
146 constructs
147 }
148
149 /// MDX: CommonMark with raw HTML and indented code off, and MDX ESM,
150 /// expressions, and JSX on.
151 pub const fn mdx() -> Self {
152 let mut constructs = Self::commonmark();
153 constructs.html_block = false;
154 constructs.html_inline = false;
155 constructs.indented_code = false;
156 constructs.mdx_esm = true;
157 constructs.mdx_expression_block = true;
158 constructs.mdx_expression_inline = true;
159 constructs.mdx_jsx_block = true;
160 constructs.mdx_jsx_inline = true;
161 constructs
162 }
163
164 /// The maximal non-MDX construct set, and the default dialect: every
165 /// construct that does not reinterpret a core CommonMark delimiter. MDX is
166 /// off (it conflicts with raw HTML and reinterprets `{…}`/`<…>`), and
167 /// `underline` is off because it would parse `__bold__` as underline,
168 /// overriding CommonMark strong. The wikilink title order is after-pipe.
169 pub const fn max() -> Self {
170 Self {
171 html_block: true,
172 html_inline: true,
173 indented_code: true,
174 gfm_table: true,
175 gfm_task_list_item: true,
176 gfm_strikethrough: true,
177 gfm_autolink_literal: true,
178 relaxed_autolinks: true,
179 gfm_alert: true,
180 underline: false,
181 insert: true,
182 highlight: true,
183 subscript: true,
184 superscript: true,
185 spoiler: true,
186 shortcode: true,
187 description_list: true,
188 footnote_definition: true,
189 footnote_reference: true,
190 inline_footnote: true,
191 math_block: true,
192 math_inline: true,
193 frontmatter: true,
194 wikilink_title_after_pipe: true,
195 wikilink_title_before_pipe: false,
196 mdx_esm: false,
197 mdx_expression_block: false,
198 mdx_expression_inline: false,
199 mdx_jsx_block: false,
200 mdx_jsx_inline: false,
201 directive_text: true,
202 directive_leaf: true,
203 directive_container: true,
204 }
205 }
206}
207
208impl Default for Constructs {
209 fn default() -> Self {
210 Self::max()
211 }
212}
213
214/// Lexing knobs that tune how existing constructs are read or how source text is
215/// preserved, separate from which constructs are recognized ([`Constructs`]).
216#[derive(Clone, Debug, Default, Eq, PartialEq)]
217pub struct ParseOptions {
218 /// Treat a single `~text~` as strikethrough (in addition to `~~text~~`).
219 /// Inert unless `gfm_strikethrough` is also enabled.
220 pub single_tilde_strikethrough: bool,
221 /// Keep backslash character escapes (e.g. `\*`) as `Escape` nodes instead of
222 /// folding them into text, so the original source can be reproduced.
223 pub preserve_character_escapes: bool,
224 /// Keep character references (e.g. `&`) as `CharacterReference` nodes
225 /// instead of resolving them to their value.
226 pub preserve_character_references: bool,
227}
228
229/// A full syntax configuration: which [`Constructs`] are recognized plus the
230/// [`ParseOptions`] lexing knobs. Build one with a preset
231/// ([`commonmark`](SyntaxOptions::commonmark)/[`gfm`](SyntaxOptions::gfm)/
232/// [`mdx`](SyntaxOptions::mdx)/[`default`](SyntaxOptions::default)), optionally
233/// tune it with [`enable`](SyntaxOptions::enable)/[`disable`](SyntaxOptions::disable),
234/// then call [`parse`](SyntaxOptions::parse).
235#[derive(Clone, Debug, Eq, PartialEq)]
236pub struct SyntaxOptions {
237 /// Which syntactic constructs are recognized.
238 pub constructs: Constructs,
239 /// Lexing / source-preservation knobs.
240 pub parse: ParseOptions,
241}
242
243impl SyntaxOptions {
244 /// The strict CommonMark dialect.
245 pub fn commonmark() -> Self {
246 Self {
247 constructs: Constructs::commonmark(),
248 parse: ParseOptions::default(),
249 }
250 }
251
252 /// GitHub Flavored Markdown (also enables single-tilde strikethrough).
253 pub fn gfm() -> Self {
254 Self {
255 constructs: Constructs::gfm(),
256 parse: ParseOptions {
257 single_tilde_strikethrough: true,
258 preserve_character_escapes: false,
259 preserve_character_references: false,
260 },
261 }
262 }
263
264 /// The MDX dialect (JSX, expressions, ESM; no raw HTML).
265 pub fn mdx() -> Self {
266 Self {
267 constructs: Constructs::mdx(),
268 parse: ParseOptions::default(),
269 }
270 }
271
272 /// Enable a [`Construct`] on top of these options, returning the modified
273 /// options for chaining. Grouped constructs (footnotes, math, directives, …)
274 /// flip every flag in the group so no member is left silently inert.
275 pub fn enable(mut self, construct: Construct) -> Self {
276 construct.apply(&mut self.constructs, true);
277 self
278 }
279
280 /// Disable a [`Construct`], the inverse of [`SyntaxOptions::enable`].
281 pub fn disable(mut self, construct: Construct) -> Self {
282 construct.apply(&mut self.constructs, false);
283 self
284 }
285
286 /// Check for contradictory construct combinations (MDX JSX with raw HTML;
287 /// both wikilink title orders). Returns `Ok(())` for every preset; only a
288 /// hand-built config can trip a [`SyntaxConfigError`].
289 pub fn validate(&self) -> Result<(), SyntaxConfigError> {
290 if (self.constructs.mdx_jsx_block || self.constructs.mdx_jsx_inline)
291 && (self.constructs.html_block || self.constructs.html_inline)
292 {
293 return Err(SyntaxConfigError::MdxHtmlConflict);
294 }
295 if self.constructs.wikilink_title_after_pipe && self.constructs.wikilink_title_before_pipe {
296 return Err(SyntaxConfigError::WikilinkTitleOrderConflict);
297 }
298
299 Ok(())
300 }
301}
302
303impl Default for SyntaxOptions {
304 fn default() -> Self {
305 Self {
306 constructs: Constructs::max(),
307 parse: ParseOptions::default(),
308 }
309 }
310}
311
312/// Where a wikilink's display title sits relative to the `|` separator. The two
313/// orders are mutually exclusive ([`SyntaxConfigError::WikilinkTitleOrderConflict`]).
314#[derive(Clone, Copy, Debug, Eq, PartialEq)]
315pub enum WikiLinkOrder {
316 /// `[[target|title]]` — the Obsidian convention, and the maximal default.
317 TitleAfterPipe,
318 /// `[[title|target]]`.
319 TitleBeforePipe,
320}
321
322/// A discoverable, typo-proof front door for toggling a syntax feature via
323/// [`SyntaxOptions::enable`] / [`SyntaxOptions::disable`]. Each variant maps to
324/// one conceptual feature; grouped features flip every underlying [`Constructs`]
325/// flag together. The raw [`Constructs`] struct remains the exhaustive escape
326/// hatch for fine-grained control.
327#[derive(Clone, Copy, Debug, Eq, PartialEq)]
328#[non_exhaustive]
329pub enum Construct {
330 /// GFM pipe tables: `| a | b |` over `|---|---|`.
331 Table,
332 /// GFM task list items: `- [ ]` / `- [x]`.
333 TaskList,
334 /// Strikethrough: `~~text~~`.
335 Strikethrough,
336 /// GFM literal autolinks plus the cmark relaxed `scheme://` extension.
337 Autolink,
338 /// GFM alerts: `> [!NOTE]` callouts.
339 Alert,
340 /// Footnote definitions, references, and inline footnotes.
341 Footnotes,
342 /// Inline and block math.
343 Math,
344 /// A leading `---`/`+++` frontmatter block.
345 Frontmatter,
346 /// Underline: `__text__` (overrides CommonMark strong).
347 Underline,
348 /// Insertions: `++text++`.
349 Insert,
350 /// Highlight / mark: `==text==`.
351 Highlight,
352 /// Subscript: `~text~`.
353 Subscript,
354 /// Superscript: `^text^`.
355 Superscript,
356 /// Spoilers: `||text||`.
357 Spoiler,
358 /// Emoji-style shortcodes: `:tada:`.
359 Shortcode,
360 /// Description / definition lists.
361 DescriptionList,
362 /// Wikilinks `[[…]]` with the given title order.
363 Wikilinks(WikiLinkOrder),
364 /// MDX JSX (block and inline). Conflicts with raw HTML; pair with
365 /// `disable`-ing HTML or start from [`SyntaxOptions::mdx`].
366 MdxJsx,
367 /// MDX `{…}` expressions (block and inline).
368 MdxExpressions,
369 /// MDX ESM `import`/`export` lines.
370 MdxEsm,
371 /// The `:name` / `::name` / `:::name` directive family.
372 Directives,
373}
374
375impl Construct {
376 fn apply(self, c: &mut Constructs, on: bool) {
377 match self {
378 Construct::Table => c.gfm_table = on,
379 Construct::TaskList => c.gfm_task_list_item = on,
380 Construct::Strikethrough => c.gfm_strikethrough = on,
381 Construct::Autolink => {
382 c.gfm_autolink_literal = on;
383 c.relaxed_autolinks = on;
384 }
385 Construct::Alert => c.gfm_alert = on,
386 Construct::Footnotes => {
387 c.footnote_definition = on;
388 c.footnote_reference = on;
389 c.inline_footnote = on;
390 }
391 Construct::Math => {
392 c.math_block = on;
393 c.math_inline = on;
394 }
395 Construct::Frontmatter => c.frontmatter = on,
396 Construct::Underline => c.underline = on,
397 Construct::Insert => c.insert = on,
398 Construct::Highlight => c.highlight = on,
399 Construct::Subscript => c.subscript = on,
400 Construct::Superscript => c.superscript = on,
401 Construct::Spoiler => c.spoiler = on,
402 Construct::Shortcode => c.shortcode = on,
403 Construct::DescriptionList => c.description_list = on,
404 Construct::Wikilinks(order) => {
405 c.wikilink_title_after_pipe = on && matches!(order, WikiLinkOrder::TitleAfterPipe);
406 c.wikilink_title_before_pipe =
407 on && matches!(order, WikiLinkOrder::TitleBeforePipe);
408 }
409 Construct::MdxJsx => {
410 c.mdx_jsx_block = on;
411 c.mdx_jsx_inline = on;
412 }
413 Construct::MdxExpressions => {
414 c.mdx_expression_block = on;
415 c.mdx_expression_inline = on;
416 }
417 Construct::MdxEsm => c.mdx_esm = on,
418 Construct::Directives => {
419 c.directive_text = on;
420 c.directive_leaf = on;
421 c.directive_container = on;
422 }
423 }
424 }
425}
426
427/// A contradictory [`SyntaxOptions`] configuration, reported by
428/// [`SyntaxOptions::validate`].
429#[derive(Clone, Debug, Eq, PartialEq)]
430pub enum SyntaxConfigError {
431 /// MDX JSX and raw HTML were both enabled; they both claim `<`.
432 MdxHtmlConflict,
433 /// Both wikilink title orders (before- and after-pipe) were enabled.
434 WikilinkTitleOrderConflict,
435}
436
437impl SyntaxConfigError {
438 /// A human-readable description of the conflict.
439 pub fn message(&self) -> String {
440 match self {
441 Self::MdxHtmlConflict => "MDX JSX and raw HTML syntax cannot both be enabled".into(),
442 Self::WikilinkTitleOrderConflict => {
443 "wikilink title-before-pipe and title-after-pipe cannot both be enabled".into()
444 }
445 }
446 }
447}