Skip to main content

markdown_syntax/
options.rs

1//! Parser configuration: which Markdown constructs are recognized and how.
2//!
3//! [`SyntaxOptions`] is the entry point — pick a preset, optionally tune it with
4//! the [`Construct`] builder, then call [`SyntaxOptions::parse`]. [`Constructs`]
5//! is the exhaustive per-feature flag set behind it, and [`ParseOptions`] holds
6//! the lexing knobs.
7
8use alloc::string::String;
9
10/// The full set of syntactic constructs the parser may recognize, one boolean
11/// per feature. This is the exhaustive escape hatch; most callers use the
12/// [`Constructs::commonmark`]/[`gfm`](Constructs::gfm)/[`mdx`](Constructs::mdx)/
13/// [`max`](Constructs::max) presets or the [`Construct`] builder instead of
14/// setting fields directly.
15#[derive(Clone, Debug, Eq, PartialEq)]
16pub struct Constructs {
17    /// Raw HTML blocks, e.g. a `<div>…</div>` block at the top level.
18    pub html_block: bool,
19    /// Raw inline HTML, e.g. `<span>` within a paragraph.
20    pub html_inline: bool,
21    /// Indented code blocks (each line indented four spaces or a tab).
22    pub indented_code: bool,
23    /// GFM pipe tables: a `| a | b |` row over a `|---|---|` delimiter row.
24    pub gfm_table: bool,
25    /// GFM task list items: `- [ ]` (unchecked) and `- [x]` (checked).
26    pub gfm_task_list_item: bool,
27    /// GFM strikethrough: `~~text~~`.
28    pub gfm_strikethrough: bool,
29    /// GFM literal autolinks: a bare `https://…`, `www.…`, or email becomes a
30    /// link without angle brackets.
31    pub gfm_autolink_literal: bool,
32    /// cmark-gfm "relaxed" URL autolinks: bare `scheme://` URLs (and a bare
33    /// leading `://`) are auto-linkified without angle brackets, e.g. `smb://`,
34    /// `irc://`, `rdar://`. This is a cmark extension beyond the GFM spec (which
35    /// defines only `http(s)://`/`www.`/email); on by default in `gfm()` for
36    /// GitHub/cmark-gfm parity. The angle form `<scheme:…>` works regardless.
37    pub relaxed_autolinks: bool,
38    /// GFM alerts: a `> [!NOTE]` (TIP/IMPORTANT/WARNING/CAUTION) blockquote.
39    pub gfm_alert: bool,
40    /// Underline spans: `__text__`. This overrides CommonMark's `__`-as-strong,
41    /// so it is off in the [`max`](Constructs::max) default.
42    pub underline: bool,
43    /// CriticMarkup-style insertions: `++text++`.
44    pub insert: bool,
45    /// Highlight / "mark" spans: `==text==`.
46    pub highlight: bool,
47    /// Subscript: a single-tilde span `~text~` (no spaces).
48    pub subscript: bool,
49    /// Superscript: `^text^`.
50    pub superscript: bool,
51    /// Spoiler spans: `||text||`.
52    pub spoiler: bool,
53    /// Emoji-style shortcodes: `:tada:`.
54    pub shortcode: bool,
55    /// Description (definition) lists: a term followed by `:`-led details.
56    pub description_list: bool,
57    /// Footnote definitions: `[^1]: the footnote body`.
58    pub footnote_definition: bool,
59    /// Footnote references: `[^1]` in running text.
60    pub footnote_reference: bool,
61    /// Inline footnotes: `^[the note inline]` (also needs `footnote_reference`).
62    pub inline_footnote: bool,
63    /// Block math: a `$$ … $$` fenced block.
64    pub math_block: bool,
65    /// Inline math: `$x$` (and the math-code form `` $`x`$ ``).
66    pub math_inline: bool,
67    /// A leading frontmatter block at the start of the document: `---` YAML or
68    /// `+++` TOML.
69    pub frontmatter: bool,
70    /// Wikilinks with the display title after the pipe: `[[target|title]]`
71    /// (the Obsidian convention). Mutually exclusive with the before-pipe order.
72    pub wikilink_title_after_pipe: bool,
73    /// Wikilinks with the display title before the pipe: `[[title|target]]`.
74    /// Mutually exclusive with the after-pipe order.
75    pub wikilink_title_before_pipe: bool,
76    /// MDX ESM: `import`/`export` statement lines.
77    pub mdx_esm: bool,
78    /// MDX block-level `{ … }` expressions.
79    pub mdx_expression_block: bool,
80    /// MDX inline `{ … }` expressions within text.
81    pub mdx_expression_inline: bool,
82    /// MDX block-level JSX: `<Component/>` as a block. Conflicts with raw HTML.
83    pub mdx_jsx_block: bool,
84    /// MDX inline JSX: `<Component/>` within text. Conflicts with raw HTML.
85    pub mdx_jsx_inline: bool,
86    /// Inline directive: `:name[label]{key=val}`. A directive, not MDX.
87    pub directive_text: bool,
88    /// Leaf directive: `::name[label]{key=val}` on its own line. A directive,
89    /// not MDX.
90    pub directive_leaf: bool,
91    /// Container directive: a `:::name … :::` fenced block. A directive, not MDX.
92    pub directive_container: bool,
93}
94
95impl Constructs {
96    /// The CommonMark baseline: raw HTML and indented code, no extensions.
97    pub const fn commonmark() -> Self {
98        Self {
99            html_block: true,
100            html_inline: true,
101            indented_code: true,
102            gfm_table: false,
103            gfm_task_list_item: false,
104            gfm_strikethrough: false,
105            gfm_autolink_literal: false,
106            relaxed_autolinks: false,
107            gfm_alert: false,
108            underline: false,
109            insert: false,
110            highlight: false,
111            subscript: false,
112            superscript: false,
113            spoiler: false,
114            shortcode: false,
115            description_list: false,
116            footnote_definition: false,
117            footnote_reference: false,
118            inline_footnote: false,
119            math_block: false,
120            math_inline: false,
121            frontmatter: false,
122            wikilink_title_after_pipe: false,
123            wikilink_title_before_pipe: false,
124            mdx_esm: false,
125            mdx_expression_block: false,
126            mdx_expression_inline: false,
127            mdx_jsx_block: false,
128            mdx_jsx_inline: false,
129            directive_text: false,
130            directive_leaf: false,
131            directive_container: false,
132        }
133    }
134
135    /// GitHub Flavored Markdown: CommonMark plus tables, task lists,
136    /// strikethrough, literal autolinks, and footnotes.
137    pub const fn gfm() -> Self {
138        let mut constructs = Self::commonmark();
139        constructs.gfm_table = true;
140        constructs.gfm_task_list_item = true;
141        constructs.gfm_strikethrough = true;
142        constructs.gfm_autolink_literal = true;
143        constructs.relaxed_autolinks = true;
144        constructs.footnote_definition = true;
145        constructs.footnote_reference = true;
146        constructs
147    }
148
149    /// MDX: CommonMark with raw HTML and indented code off, and MDX ESM,
150    /// expressions, and JSX on.
151    pub const fn mdx() -> Self {
152        let mut constructs = Self::commonmark();
153        constructs.html_block = false;
154        constructs.html_inline = false;
155        constructs.indented_code = false;
156        constructs.mdx_esm = true;
157        constructs.mdx_expression_block = true;
158        constructs.mdx_expression_inline = true;
159        constructs.mdx_jsx_block = true;
160        constructs.mdx_jsx_inline = true;
161        constructs
162    }
163
164    /// The maximal non-MDX construct set, and the default dialect: every
165    /// construct that does not reinterpret a core CommonMark delimiter. MDX is
166    /// off (it conflicts with raw HTML and reinterprets `{…}`/`<…>`), and
167    /// `underline` is off because it would parse `__bold__` as underline,
168    /// overriding CommonMark strong. The wikilink title order is after-pipe.
169    pub const fn max() -> Self {
170        Self {
171            html_block: true,
172            html_inline: true,
173            indented_code: true,
174            gfm_table: true,
175            gfm_task_list_item: true,
176            gfm_strikethrough: true,
177            gfm_autolink_literal: true,
178            relaxed_autolinks: true,
179            gfm_alert: true,
180            underline: false,
181            insert: true,
182            highlight: true,
183            subscript: true,
184            superscript: true,
185            spoiler: true,
186            shortcode: true,
187            description_list: true,
188            footnote_definition: true,
189            footnote_reference: true,
190            inline_footnote: true,
191            math_block: true,
192            math_inline: true,
193            frontmatter: true,
194            wikilink_title_after_pipe: true,
195            wikilink_title_before_pipe: false,
196            mdx_esm: false,
197            mdx_expression_block: false,
198            mdx_expression_inline: false,
199            mdx_jsx_block: false,
200            mdx_jsx_inline: false,
201            directive_text: true,
202            directive_leaf: true,
203            directive_container: true,
204        }
205    }
206}
207
208impl Default for Constructs {
209    fn default() -> Self {
210        Self::max()
211    }
212}
213
214/// Lexing knobs that tune how existing constructs are read or how source text is
215/// preserved, separate from which constructs are recognized ([`Constructs`]).
216#[derive(Clone, Debug, Default, Eq, PartialEq)]
217pub struct ParseOptions {
218    /// Treat a single `~text~` as strikethrough (in addition to `~~text~~`).
219    /// Inert unless `gfm_strikethrough` is also enabled.
220    pub single_tilde_strikethrough: bool,
221    /// Keep backslash character escapes (e.g. `\*`) as `Escape` nodes instead of
222    /// folding them into text, so the original source can be reproduced.
223    pub preserve_character_escapes: bool,
224    /// Keep character references (e.g. `&amp;`) as `CharacterReference` nodes
225    /// instead of resolving them to their value.
226    pub preserve_character_references: bool,
227}
228
229/// A full syntax configuration: which [`Constructs`] are recognized plus the
230/// [`ParseOptions`] lexing knobs. Build one with a preset
231/// ([`commonmark`](SyntaxOptions::commonmark)/[`gfm`](SyntaxOptions::gfm)/
232/// [`mdx`](SyntaxOptions::mdx)/[`default`](SyntaxOptions::default)), optionally
233/// tune it with [`enable`](SyntaxOptions::enable)/[`disable`](SyntaxOptions::disable),
234/// then call [`parse`](SyntaxOptions::parse).
235#[derive(Clone, Debug, Eq, PartialEq)]
236pub struct SyntaxOptions {
237    /// Which syntactic constructs are recognized.
238    pub constructs: Constructs,
239    /// Lexing / source-preservation knobs.
240    pub parse: ParseOptions,
241}
242
243impl SyntaxOptions {
244    /// The strict CommonMark dialect.
245    pub fn commonmark() -> Self {
246        Self {
247            constructs: Constructs::commonmark(),
248            parse: ParseOptions::default(),
249        }
250    }
251
252    /// GitHub Flavored Markdown (also enables single-tilde strikethrough).
253    pub fn gfm() -> Self {
254        Self {
255            constructs: Constructs::gfm(),
256            parse: ParseOptions {
257                single_tilde_strikethrough: true,
258                preserve_character_escapes: false,
259                preserve_character_references: false,
260            },
261        }
262    }
263
264    /// The MDX dialect (JSX, expressions, ESM; no raw HTML).
265    pub fn mdx() -> Self {
266        Self {
267            constructs: Constructs::mdx(),
268            parse: ParseOptions::default(),
269        }
270    }
271
272    /// Enable a [`Construct`] on top of these options, returning the modified
273    /// options for chaining. Grouped constructs (footnotes, math, directives, …)
274    /// flip every flag in the group so no member is left silently inert.
275    pub fn enable(mut self, construct: Construct) -> Self {
276        construct.apply(&mut self.constructs, true);
277        self
278    }
279
280    /// Disable a [`Construct`], the inverse of [`SyntaxOptions::enable`].
281    pub fn disable(mut self, construct: Construct) -> Self {
282        construct.apply(&mut self.constructs, false);
283        self
284    }
285
286    /// Check for contradictory construct combinations (MDX JSX with raw HTML;
287    /// both wikilink title orders). Returns `Ok(())` for every preset; only a
288    /// hand-built config can trip a [`SyntaxConfigError`].
289    pub fn validate(&self) -> Result<(), SyntaxConfigError> {
290        if (self.constructs.mdx_jsx_block || self.constructs.mdx_jsx_inline)
291            && (self.constructs.html_block || self.constructs.html_inline)
292        {
293            return Err(SyntaxConfigError::MdxHtmlConflict);
294        }
295        if self.constructs.wikilink_title_after_pipe && self.constructs.wikilink_title_before_pipe {
296            return Err(SyntaxConfigError::WikilinkTitleOrderConflict);
297        }
298
299        Ok(())
300    }
301}
302
303impl Default for SyntaxOptions {
304    fn default() -> Self {
305        Self {
306            constructs: Constructs::max(),
307            parse: ParseOptions::default(),
308        }
309    }
310}
311
312/// Where a wikilink's display title sits relative to the `|` separator. The two
313/// orders are mutually exclusive ([`SyntaxConfigError::WikilinkTitleOrderConflict`]).
314#[derive(Clone, Copy, Debug, Eq, PartialEq)]
315pub enum WikiLinkOrder {
316    /// `[[target|title]]` — the Obsidian convention, and the maximal default.
317    TitleAfterPipe,
318    /// `[[title|target]]`.
319    TitleBeforePipe,
320}
321
322/// A discoverable, typo-proof front door for toggling a syntax feature via
323/// [`SyntaxOptions::enable`] / [`SyntaxOptions::disable`]. Each variant maps to
324/// one conceptual feature; grouped features flip every underlying [`Constructs`]
325/// flag together. The raw [`Constructs`] struct remains the exhaustive escape
326/// hatch for fine-grained control.
327#[derive(Clone, Copy, Debug, Eq, PartialEq)]
328#[non_exhaustive]
329pub enum Construct {
330    /// GFM pipe tables: `| a | b |` over `|---|---|`.
331    Table,
332    /// GFM task list items: `- [ ]` / `- [x]`.
333    TaskList,
334    /// Strikethrough: `~~text~~`.
335    Strikethrough,
336    /// GFM literal autolinks plus the cmark relaxed `scheme://` extension.
337    Autolink,
338    /// GFM alerts: `> [!NOTE]` callouts.
339    Alert,
340    /// Footnote definitions, references, and inline footnotes.
341    Footnotes,
342    /// Inline and block math.
343    Math,
344    /// A leading `---`/`+++` frontmatter block.
345    Frontmatter,
346    /// Underline: `__text__` (overrides CommonMark strong).
347    Underline,
348    /// Insertions: `++text++`.
349    Insert,
350    /// Highlight / mark: `==text==`.
351    Highlight,
352    /// Subscript: `~text~`.
353    Subscript,
354    /// Superscript: `^text^`.
355    Superscript,
356    /// Spoilers: `||text||`.
357    Spoiler,
358    /// Emoji-style shortcodes: `:tada:`.
359    Shortcode,
360    /// Description / definition lists.
361    DescriptionList,
362    /// Wikilinks `[[…]]` with the given title order.
363    Wikilinks(WikiLinkOrder),
364    /// MDX JSX (block and inline). Conflicts with raw HTML; pair with
365    /// `disable`-ing HTML or start from [`SyntaxOptions::mdx`].
366    MdxJsx,
367    /// MDX `{…}` expressions (block and inline).
368    MdxExpressions,
369    /// MDX ESM `import`/`export` lines.
370    MdxEsm,
371    /// The `:name` / `::name` / `:::name` directive family.
372    Directives,
373}
374
375impl Construct {
376    fn apply(self, c: &mut Constructs, on: bool) {
377        match self {
378            Construct::Table => c.gfm_table = on,
379            Construct::TaskList => c.gfm_task_list_item = on,
380            Construct::Strikethrough => c.gfm_strikethrough = on,
381            Construct::Autolink => {
382                c.gfm_autolink_literal = on;
383                c.relaxed_autolinks = on;
384            }
385            Construct::Alert => c.gfm_alert = on,
386            Construct::Footnotes => {
387                c.footnote_definition = on;
388                c.footnote_reference = on;
389                c.inline_footnote = on;
390            }
391            Construct::Math => {
392                c.math_block = on;
393                c.math_inline = on;
394            }
395            Construct::Frontmatter => c.frontmatter = on,
396            Construct::Underline => c.underline = on,
397            Construct::Insert => c.insert = on,
398            Construct::Highlight => c.highlight = on,
399            Construct::Subscript => c.subscript = on,
400            Construct::Superscript => c.superscript = on,
401            Construct::Spoiler => c.spoiler = on,
402            Construct::Shortcode => c.shortcode = on,
403            Construct::DescriptionList => c.description_list = on,
404            Construct::Wikilinks(order) => {
405                c.wikilink_title_after_pipe = on && matches!(order, WikiLinkOrder::TitleAfterPipe);
406                c.wikilink_title_before_pipe =
407                    on && matches!(order, WikiLinkOrder::TitleBeforePipe);
408            }
409            Construct::MdxJsx => {
410                c.mdx_jsx_block = on;
411                c.mdx_jsx_inline = on;
412            }
413            Construct::MdxExpressions => {
414                c.mdx_expression_block = on;
415                c.mdx_expression_inline = on;
416            }
417            Construct::MdxEsm => c.mdx_esm = on,
418            Construct::Directives => {
419                c.directive_text = on;
420                c.directive_leaf = on;
421                c.directive_container = on;
422            }
423        }
424    }
425}
426
427/// A contradictory [`SyntaxOptions`] configuration, reported by
428/// [`SyntaxOptions::validate`].
429#[derive(Clone, Debug, Eq, PartialEq)]
430pub enum SyntaxConfigError {
431    /// MDX JSX and raw HTML were both enabled; they both claim `<`.
432    MdxHtmlConflict,
433    /// Both wikilink title orders (before- and after-pipe) were enabled.
434    WikilinkTitleOrderConflict,
435}
436
437impl SyntaxConfigError {
438    /// A human-readable description of the conflict.
439    pub fn message(&self) -> String {
440        match self {
441            Self::MdxHtmlConflict => "MDX JSX and raw HTML syntax cannot both be enabled".into(),
442            Self::WikilinkTitleOrderConflict => {
443                "wikilink title-before-pipe and title-after-pipe cannot both be enabled".into()
444            }
445        }
446    }
447}