Skip to main content

carta_core/
extensions.rs

1//! Format extensions: the set of optional syntax features a reader or writer may honor.
2//!
3//! [`Extension`] is one named feature; [`Extensions`] is a deterministic, allocation-free set of them
4//! backed by a fixed array of 64-bit words. The set carries no 128-variant ceiling, so it scales to
5//! the full extension set. [`presets`] holds the per-flavor sets; strict `CommonMark` is the empty set.
6
7/// Generates the [`Extension`] enum together with the `ALL`/`COUNT`/`name` metadata, keeping the
8/// variant list as the single source of truth for the bitset sizing in [`Extensions`].
9macro_rules! define_extensions {
10    ($($(#[$attribute:meta])* $variant:ident => $name:literal),+ $(,)?) => {
11        /// A single format extension. Each variant's position in [`Extension::ALL`] is its bit
12        /// index in [`Extensions`].
13        #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14        #[non_exhaustive]
15        pub enum Extension { $($(#[$attribute])* $variant),+ }
16
17        impl Extension {
18            /// Every extension, in declaration order.
19            pub const ALL: &'static [Extension] = &[$(Extension::$variant),+];
20            /// The number of distinct extensions.
21            pub const COUNT: usize = Self::ALL.len();
22
23            /// The extension's identifier (e.g. `"footnotes"`).
24            #[must_use]
25            pub const fn name(self) -> &'static str {
26                match self { $(Extension::$variant => $name),+ }
27            }
28
29            /// The extension named `name`, or `None` if no extension uses that identifier.
30            #[must_use]
31            pub fn from_name(name: &str) -> Option<Extension> {
32                match name { $($name => Some(Extension::$variant),)+ _ => None }
33            }
34        }
35    };
36}
37
38define_extensions! {
39    /// Straight quotes, `...`, `--`, and `---` become curly quotes, an ellipsis, and en/em dashes.
40    Smart => "smart",
41    /// `~~text~~` strikeout spans.
42    Strikeout => "strikeout",
43    /// `^text^` superscript spans.
44    Superscript => "superscript",
45    /// `~text~` subscript spans.
46    Subscript => "subscript",
47    /// Pipe tables: `|`-separated cells with a delimiter row carrying the column alignments.
48    PipeTables => "pipe_tables",
49    /// `[^label]` footnote references with separately defined note bodies.
50    Footnotes => "footnotes",
51    /// `- [ ]` / `- [x]` task-list items.
52    TaskLists => "task_lists",
53    /// A bare absolute URI or `www.` address in running text becomes a link.
54    Autolink => "autolink_bare_uris",
55    /// `$…$` inline and `$$…$$` display math.
56    TexMathDollars => "tex_math_dollars",
57    /// `:::`-fenced divs carrying an attribute block or a bare class name.
58    FencedDivs => "fenced_divs",
59    /// `[text]{.class}` spans: bracketed text followed by an attribute block.
60    BracketedSpans => "bracketed_spans",
61    /// Every newline within a paragraph is a hard line break.
62    HardLineBreaks => "hard_line_breaks",
63    /// Raw HTML tags and blocks are carried through rather than treated as text.
64    RawHtml => "raw_html",
65    /// A `{#id .class key=val}` attribute block on a header line.
66    HeaderAttributes => "header_attributes",
67    /// An attribute block on a fenced code block's opening line.
68    FencedCodeAttributes => "fenced_code_attributes",
69    /// An attribute block after an inline code span.
70    InlineCodeAttributes => "inline_code_attributes",
71    /// An attribute block after a link or image.
72    LinkAttributes => "link_attributes",
73    /// The combined attribute toggle: the attribute syntaxes enabled as a group.
74    Attributes => "attributes",
75    /// Definition lists: a term line followed by `:`-marked definition blocks.
76    DefinitionLists => "definition_lists",
77    /// Grid tables drawn with `+---+` cell borders.
78    GridTables => "grid_tables",
79    /// Multiline tables, whose cells may continue across several source lines.
80    MultilineTables => "multiline_tables",
81    /// Simple tables: columns aligned under a dashed header line.
82    SimpleTables => "simple_tables",
83    /// A `Table:` (or bare `:`) caption line attached to a table.
84    TableCaptions => "table_captions",
85    /// `|`-prefixed line blocks, preserving the source's line divisions.
86    LineBlocks => "line_blocks",
87    /// Ordered-list markers beyond decimal numbers: letters, roman numerals, and `)` delimiters.
88    FancyLists => "fancy_lists",
89    /// `(@label)` example lists, numbered sequentially across the whole document.
90    ExampleLists => "example_lists",
91    /// An ordered list starts at the number its first marker carries rather than 1.
92    Startnum => "startnum",
93    /// A `---`-delimited YAML metadata block.
94    YamlMetadataBlock => "yaml_metadata_block",
95    /// A `%`-prefixed title/author/date block at the top of the document.
96    PandocTitleBlock => "pandoc_title_block",
97    /// A header without an explicit identifier gets one derived from its text.
98    AutoIdentifiers => "auto_identifiers",
99    /// Derived header identifiers use the `GitHub` slug form: lowercased, punctuation dropped,
100    /// spaces to hyphens.
101    GfmAutoIdentifiers => "gfm_auto_identifiers",
102    /// Fold a derived identifier down to ASCII, dropping diacritics before the slug is formed.
103    AsciiIdentifiers => "ascii_identifiers",
104    /// A header's explicit identifier is written in `MultiMarkdown`'s trailing `[id]` form rather
105    /// than the `{#id}` attribute block.
106    MmdHeaderIdentifiers => "mmd_header_identifiers",
107    /// A header's own text works as a reference-link label for that header.
108    ImplicitHeaderReferences => "implicit_header_references",
109    /// A bare image with a caption becomes a figure.
110    ImplicitFigures => "implicit_figures",
111    /// Raw passthrough: `` `code`{=fmt} `` inline and ```` ```{=fmt} ```` fenced blocks.
112    RawAttribute => "raw_attribute",
113    /// A `^[…]` inline note expands to a footnote in place.
114    InlineNotes => "inline_notes",
115    /// A block-level `<div>` becomes a `Div`, with Markdown parsed inside.
116    NativeDivs => "native_divs",
117    /// An inline `<span>` becomes a `Span`, with Markdown parsed inside.
118    NativeSpans => "native_spans",
119    /// Markdown is parsed inside block-level HTML, which is otherwise split tag-by-tag.
120    MarkdownInHtmlBlocks => "markdown_in_html_blocks",
121    /// A `<div>`/`<span>` emitted for a div/span carries a `data-markdown="1"` marker so its
122    /// contents are still parsed as Markdown; this also forces a div with no native syntax into an
123    /// HTML wrap.
124    MarkdownAttribute => "markdown_attribute",
125    /// Inline raw `TeX` (`\command{…}`, `\begin{env}…\end{env}`) passes through verbatim.
126    RawTex => "raw_tex",
127    /// `[@key]` / `@key` citation references.
128    Citations => "citations",
129    /// An attribute block on a table's caption line attaches to the table.
130    TableAttributes => "table_attributes",
131    /// A blank line is required before a blockquote, so one never interrupts a paragraph.
132    BlankBeforeBlockquote => "blank_before_blockquote",
133    /// A blank line is required before a header, so one never interrupts a paragraph.
134    BlankBeforeHeader => "blank_before_header",
135    /// `==text==` highlight spans.
136    Mark => "mark",
137    /// `:name:` emoji shortcodes.
138    Emoji => "emoji",
139    /// `> [!NOTE]`-style admonition blockquotes become classed divs.
140    Alerts => "alerts",
141    /// `\(…\)` inline and `\[…\]` display math delimiters.
142    TexMathSingleBackslash => "tex_math_single_backslash",
143    /// `\\(…\\)` inline and `\\[…\\]` display math delimiters.
144    TexMathDoubleBackslash => "tex_math_double_backslash",
145    /// Tilde-fenced (`~~~`) code blocks; with no fence form available, code is written in the
146    /// four-space indented form.
147    FencedCodeBlocks => "fenced_code_blocks",
148    /// Backtick-fenced code blocks.
149    BacktickCodeBlocks => "backtick_code_blocks",
150    /// The `GitHub` math surface: inline `` $`…`$ `` and a ```` ```math ```` display block, as
151    /// opposed to the `$…$`/`$$…$$` dollar form.
152    TexMathGfm => "tex_math_gfm",
153    /// A backslash at a line's end is a hard line break, written as a trailing `\`; without it the
154    /// writer falls back to two trailing spaces.
155    EscapedLineBreaks => "escaped_line_breaks",
156    /// An underscore inside a word opens no emphasis, so the writer leaves intra-word `_` literal;
157    /// without it every `_` is escaped so a strict reader cannot start emphasis mid-word.
158    IntrawordUnderscores => "intraword_underscores",
159    /// A list may begin directly after a paragraph line with no intervening blank line,
160    /// interrupting it; without it a list marker on the line after a paragraph folds into that
161    /// paragraph.
162    ListsWithoutPrecedingBlankline => "lists_without_preceding_blankline",
163    /// `*[SHY]: Soft hyphen` abbreviation definitions, applied to later occurrences of the term.
164    Abbreviations => "abbreviations",
165    /// A backslash escapes any symbol, not only the ASCII-punctuation subset.
166    AllSymbolsEscapable => "all_symbols_escapable",
167    /// A backslash before `<` or `>` escapes the angle bracket.
168    AngleBracketsEscapable => "angle_brackets_escapable",
169    /// Line breaks between East Asian wide characters carry no width and are dropped.
170    EastAsianLineBreaks => "east_asian_line_breaks",
171    /// An indented code block requires four spaces of indentation rather than one tab stop.
172    FourSpaceRule => "four_space_rule",
173    /// Typographic conventions of the Project Gutenberg style for plain-text output.
174    Gutenberg => "gutenberg",
175    /// Soft line breaks within a paragraph are discarded rather than kept as spaces.
176    IgnoreLineBreaks => "ignore_line_breaks",
177    /// User-defined `LaTeX` macros are expanded in math and raw `TeX`.
178    LatexMacros => "latex_macros",
179    /// Bird-track (`> `) literate-program code sections.
180    LiterateHaskell => "literate_haskell",
181    /// An attribute block following a link or image in the `MultiMarkdown` position.
182    MmdLinkAttributes => "mmd_link_attributes",
183    /// A `MultiMarkdown` metadata block at the top of the document.
184    MmdTitleBlock => "mmd_title_block",
185    /// `-` and `--` map to en/em dashes under the older dash convention.
186    OldDashes => "old_dashes",
187    /// A raw block or inline may be written directly as Markdown for round-tripping.
188    RawMarkdown => "raw_markdown",
189    /// Relative paths in links and images are rebased onto the source file's location.
190    RebaseRelativePaths => "rebase_relative_paths",
191    /// `~x` / `^x` subscript and superscript bind only the single following character.
192    ShortSubsuperscripts => "short_subsuperscripts",
193    /// A defined label may be referenced by `[label]` alone, with no following `[]` or `(…)`.
194    ShortcutReferenceLinks => "shortcut_reference_links",
195    /// An ATX header requires a space between the opening `#` run and the heading text.
196    SpaceInAtxHeader => "space_in_atx_header",
197    /// A reference link's label and its following `[id]` may be separated by whitespace.
198    SpacedReferenceLinks => "spaced_reference_links",
199    /// `[[target|title]]` wiki links, with the title following the pipe.
200    WikilinksTitleAfterPipe => "wikilinks_title_after_pipe",
201    /// `[[title|target]]` wiki links, with the title preceding the pipe.
202    WikilinksTitleBeforePipe => "wikilinks_title_before_pipe",
203}
204
205const WORD_BITS: usize = u64::BITS as usize;
206const WORDS: usize = Extension::COUNT.div_ceil(WORD_BITS);
207
208// The bitset indexing in `from_list` is sound only while each variant's discriminant equals its
209// position in `ALL` (so every `ext as usize` lands in `0..COUNT`). The macro emits no explicit
210// discriminants, so this holds — asserted at compile time here, turning a future edit that breaks
211// contiguity into a build failure rather than an out-of-bounds index.
212#[allow(clippy::indexing_slicing)]
213const _: () = {
214    let mut i = 0;
215    while i < Extension::ALL.len() {
216        assert!(Extension::ALL[i] as usize == i);
217        i += 1;
218    }
219};
220
221/// A deterministic, allocation-free set of [`Extension`]s, backed by a fixed array of 64-bit words
222/// indexed by each variant's position in [`Extension::ALL`].
223#[derive(Clone, Copy, PartialEq, Eq)]
224pub struct Extensions([u64; WORDS]);
225
226impl Default for Extensions {
227    fn default() -> Self {
228        Self::empty()
229    }
230}
231
232impl Extensions {
233    /// The empty set (strict `CommonMark`).
234    #[must_use]
235    pub const fn empty() -> Self {
236        Self([0; WORDS])
237    }
238
239    /// The set containing exactly `list`. Const so presets are `const` values.
240    #[must_use]
241    // Const indexing: contiguity (asserted above) gives `bit < COUNT`, so `bit / WORD_BITS < WORDS`;
242    // `i < list.len()`. Both indices are in bounds, and slice `get` is not usable across all const
243    // contexts on the pinned toolchain.
244    #[allow(clippy::indexing_slicing)]
245    pub const fn from_list(list: &[Extension]) -> Self {
246        let mut words = [0u64; WORDS];
247        let mut i = 0;
248        while i < list.len() {
249            let bit = list[i] as usize;
250            words[bit / WORD_BITS] |= 1u64 << (bit % WORD_BITS);
251            i += 1;
252        }
253        Self(words)
254    }
255
256    /// Whether `ext` is in the set.
257    #[must_use]
258    pub fn contains(self, ext: Extension) -> bool {
259        let bit = ext as usize;
260        self.0
261            .get(bit / WORD_BITS)
262            .is_some_and(|word| (word >> (bit % WORD_BITS)) & 1 == 1)
263    }
264
265    /// Adds `ext` to the set.
266    pub fn insert(&mut self, ext: Extension) {
267        let bit = ext as usize;
268        if let Some(word) = self.0.get_mut(bit / WORD_BITS) {
269            *word |= 1u64 << (bit % WORD_BITS);
270        }
271    }
272
273    /// Removes `ext` from the set.
274    pub fn remove(&mut self, ext: Extension) {
275        let bit = ext as usize;
276        if let Some(word) = self.0.get_mut(bit / WORD_BITS) {
277            *word &= !(1u64 << (bit % WORD_BITS));
278        }
279    }
280
281    /// The union of this set and `other`.
282    #[must_use]
283    pub fn union(self, other: Extensions) -> Extensions {
284        let mut words = self.0;
285        for (word, &add) in words.iter_mut().zip(other.0.iter()) {
286            *word |= add;
287        }
288        Extensions(words)
289    }
290
291    /// Whether the set is empty.
292    #[must_use]
293    pub fn is_empty(self) -> bool {
294        self.0.iter().all(|&word| word == 0)
295    }
296
297    /// The set's extensions in [`Extension::ALL`] (deterministic) order.
298    pub fn iter(self) -> impl Iterator<Item = Extension> {
299        Extension::ALL
300            .iter()
301            .copied()
302            .filter(move |&ext| self.contains(ext))
303    }
304}
305
306impl core::fmt::Debug for Extensions {
307    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
308        f.debug_set()
309            .entries(self.iter().map(Extension::name))
310            .finish()
311    }
312}
313
314/// Per-flavor extension sets.
315pub mod presets {
316    use super::{Extension, Extensions};
317
318    /// Strict `CommonMark`: no extensions.
319    pub const COMMONMARK: Extensions = Extensions::empty();
320
321    /// `GitHub`-Flavored Markdown.
322    pub const GFM: Extensions = Extensions::from_list(&[
323        Extension::Strikeout,
324        Extension::PipeTables,
325        Extension::BacktickCodeBlocks,
326        Extension::TaskLists,
327        Extension::Autolink,
328        Extension::Footnotes,
329        Extension::TexMathDollars,
330        Extension::TexMathGfm,
331        Extension::GfmAutoIdentifiers,
332        Extension::RawHtml,
333        Extension::Emoji,
334        Extension::Alerts,
335    ]);
336
337    /// The `commonmark_x` dialect: `CommonMark` with a broad set of inline and block extensions
338    /// enabled. `backtick_code_blocks` is additionally carried because the shared Markdown engine
339    /// fences code on that flag, which `CommonMark` does natively.
340    pub const COMMONMARK_X: Extensions = Extensions::from_list(&[
341        Extension::Smart,
342        Extension::Strikeout,
343        Extension::Superscript,
344        Extension::Subscript,
345        Extension::PipeTables,
346        Extension::Footnotes,
347        Extension::TaskLists,
348        Extension::TexMathDollars,
349        Extension::FencedDivs,
350        Extension::BracketedSpans,
351        Extension::BacktickCodeBlocks,
352        Extension::RawHtml,
353        Extension::RawAttribute,
354        Extension::Attributes,
355        Extension::HeaderAttributes,
356        Extension::FencedCodeAttributes,
357        Extension::InlineCodeAttributes,
358        Extension::LinkAttributes,
359        Extension::DefinitionLists,
360        Extension::FancyLists,
361        Extension::GfmAutoIdentifiers,
362        Extension::ImplicitHeaderReferences,
363        Extension::Emoji,
364        Extension::Alerts,
365    ]);
366
367    /// The extended Markdown dialect: the broad default extension set.
368    pub const MARKDOWN: Extensions = Extensions::from_list(&[
369        Extension::AllSymbolsEscapable,
370        Extension::Smart,
371        Extension::Strikeout,
372        Extension::Superscript,
373        Extension::Subscript,
374        Extension::PipeTables,
375        Extension::Footnotes,
376        Extension::TaskLists,
377        Extension::TexMathDollars,
378        Extension::FencedDivs,
379        Extension::BracketedSpans,
380        Extension::RawHtml,
381        Extension::HeaderAttributes,
382        Extension::FencedCodeAttributes,
383        Extension::FencedCodeBlocks,
384        Extension::BacktickCodeBlocks,
385        Extension::InlineCodeAttributes,
386        Extension::LinkAttributes,
387        Extension::DefinitionLists,
388        Extension::GridTables,
389        Extension::MultilineTables,
390        Extension::SimpleTables,
391        Extension::TableCaptions,
392        Extension::LineBlocks,
393        Extension::FancyLists,
394        Extension::ExampleLists,
395        Extension::Startnum,
396        Extension::YamlMetadataBlock,
397        Extension::PandocTitleBlock,
398        Extension::AutoIdentifiers,
399        Extension::ImplicitHeaderReferences,
400        Extension::ImplicitFigures,
401        Extension::RawAttribute,
402        Extension::InlineNotes,
403        Extension::NativeDivs,
404        Extension::NativeSpans,
405        Extension::MarkdownInHtmlBlocks,
406        Extension::RawTex,
407        Extension::Citations,
408        Extension::TableAttributes,
409        Extension::BlankBeforeBlockquote,
410        Extension::BlankBeforeHeader,
411        Extension::EscapedLineBreaks,
412        Extension::IntrawordUnderscores,
413        Extension::SpaceInAtxHeader,
414    ]);
415
416    /// The legacy GitHub Markdown dialect (`markdown_github`). The set is restricted to the
417    /// variants that exist and affect writer output: backtick-fenced code, pipe tables, strikeout,
418    /// task lists, footnotes, autolinking, emoji, and alerts, but no smart typography, math, spans,
419    /// or fenced divs.
420    pub const MARKDOWN_GITHUB: Extensions = Extensions::from_list(&[
421        Extension::Strikeout,
422        Extension::PipeTables,
423        Extension::Footnotes,
424        Extension::TaskLists,
425        Extension::Autolink,
426        Extension::RawHtml,
427        Extension::FencedCodeBlocks,
428        Extension::BacktickCodeBlocks,
429        Extension::AutoIdentifiers,
430        Extension::GfmAutoIdentifiers,
431        Extension::Emoji,
432        Extension::Alerts,
433        Extension::IntrawordUnderscores,
434    ]);
435
436    /// The PHP Markdown Extra dialect (`markdown_phpextra`). The set is restricted to the variants
437    /// that exist and affect writer output: definition lists, fenced (tilde) code blocks, footnotes,
438    /// header and link attributes, pipe tables, and raw HTML. It has no backtick code fences, so code
439    /// fences are written with tildes, and no smart typography, math, strikeout, spans, or fenced divs.
440    pub const MARKDOWN_PHPEXTRA: Extensions = Extensions::from_list(&[
441        Extension::DefinitionLists,
442        Extension::FencedCodeBlocks,
443        Extension::Footnotes,
444        Extension::HeaderAttributes,
445        Extension::IntrawordUnderscores,
446        Extension::LinkAttributes,
447        Extension::MarkdownAttribute,
448        Extension::PipeTables,
449        Extension::RawHtml,
450    ]);
451
452    /// The `MultiMarkdown` dialect (`markdown_mmd`). The set is restricted to the variants that
453    /// exist and affect writer output: backtick-fenced code, definition lists, footnotes, pipe
454    /// tables, implicit figures and header references, sub/superscript, dollar math, raw HTML and raw
455    /// attributes, auto identifiers, `MultiMarkdown`'s trailing `[id]` header identifiers, and the
456    /// `data-markdown`
457    /// div marker. It has no header attribute blocks, strikeout, task lists, smart typography, spans,
458    /// or fenced divs. With `tex_math_dollars` on and taking precedence, a `tex_math_double_backslash`
459    /// surface would not change this dialect's writer output, so it is left out of the preset and math
460    /// is emitted as `$…$`.
461    pub const MARKDOWN_MMD: Extensions = Extensions::from_list(&[
462        Extension::AutoIdentifiers,
463        Extension::BacktickCodeBlocks,
464        Extension::DefinitionLists,
465        Extension::Footnotes,
466        Extension::ImplicitFigures,
467        Extension::ImplicitHeaderReferences,
468        Extension::IntrawordUnderscores,
469        Extension::MarkdownAttribute,
470        Extension::MmdHeaderIdentifiers,
471        Extension::PipeTables,
472        Extension::RawAttribute,
473        Extension::RawHtml,
474        Extension::Subscript,
475        Extension::Superscript,
476        Extension::TexMathDollars,
477    ]);
478
479    /// The original Markdown dialect (`markdown_strict`). The set is restricted to the variants that
480    /// exist and affect writer output — only raw HTML. With no fenced or backtick code, tables,
481    /// definition lists,
482    /// footnotes, task lists, math, or any attribute syntax, every richer construct falls back to
483    /// indented code, an HTML block, or a raw glyph. Lacking `intraword_underscores`, every `_` is
484    /// escaped; lacking `pipe_tables`, a literal `|` is left unescaped.
485    pub const MARKDOWN_STRICT: Extensions = Extensions::from_list(&[Extension::RawHtml]);
486
487    // The reader default sets below are broader than the writer presets above: a reader enables every
488    // construct the dialect can parse, whereas the writer presets carry only the extensions that shape
489    // the emitted text. Some entries name constructs the shared Markdown engine does not yet branch on;
490    // they are recorded so the dialect's default surface is complete and takes effect once modeled.
491
492    /// Reader defaults for the original Markdown dialect (`markdown_strict`): only raw HTML, plus the
493    /// shortcut and spaced reference-link forms.
494    pub const MARKDOWN_STRICT_READ: Extensions = Extensions::from_list(&[
495        Extension::RawHtml,
496        Extension::ShortcutReferenceLinks,
497        Extension::SpacedReferenceLinks,
498    ]);
499
500    /// Reader defaults for the GitHub Markdown dialect (`markdown_github`): the GitHub construct set —
501    /// strikeout, task lists, pipe tables, footnotes, bare-URI autolinking, emoji, alerts, backtick and
502    /// fenced code, auto identifiers in both forms, intra-word underscores, lists that open without a
503    /// preceding blank line, and the escaping/heading-spacing leniencies.
504    pub const MARKDOWN_GITHUB_READ: Extensions = Extensions::from_list(&[
505        Extension::Alerts,
506        Extension::AllSymbolsEscapable,
507        Extension::AutoIdentifiers,
508        Extension::Autolink,
509        Extension::BacktickCodeBlocks,
510        Extension::Emoji,
511        Extension::FencedCodeBlocks,
512        Extension::Footnotes,
513        Extension::GfmAutoIdentifiers,
514        Extension::IntrawordUnderscores,
515        Extension::ListsWithoutPrecedingBlankline,
516        Extension::PipeTables,
517        Extension::RawHtml,
518        Extension::ShortcutReferenceLinks,
519        Extension::SpaceInAtxHeader,
520        Extension::Strikeout,
521        Extension::TaskLists,
522    ]);
523
524    /// Reader defaults for the PHP Markdown Extra dialect (`markdown_phpextra`): abbreviations,
525    /// definition lists, fenced code, footnotes, header and link attributes, intra-word underscores,
526    /// the `data-markdown` div marker, pipe tables, raw HTML, and the reference-link forms.
527    pub const MARKDOWN_PHPEXTRA_READ: Extensions = Extensions::from_list(&[
528        Extension::Abbreviations,
529        Extension::DefinitionLists,
530        Extension::FencedCodeBlocks,
531        Extension::Footnotes,
532        Extension::HeaderAttributes,
533        Extension::IntrawordUnderscores,
534        Extension::LinkAttributes,
535        Extension::MarkdownAttribute,
536        Extension::PipeTables,
537        Extension::RawHtml,
538        Extension::ShortcutReferenceLinks,
539        Extension::SpacedReferenceLinks,
540    ]);
541
542    /// Reader defaults for the `MultiMarkdown` dialect (`markdown_mmd`): auto identifiers, backtick
543    /// code, definition lists, footnotes, implicit figures and header references, intra-word
544    /// underscores, the `data-markdown` div marker, `MultiMarkdown`'s trailing `[id]` header
545    /// identifiers, its link-attribute and title-block forms, pipe tables, raw HTML and raw attributes,
546    /// single-character sub/superscripts, the reference-link forms, sub/superscript spans, dollar math,
547    /// and the double-backslash math delimiters.
548    pub const MARKDOWN_MMD_READ: Extensions = Extensions::from_list(&[
549        Extension::AllSymbolsEscapable,
550        Extension::AutoIdentifiers,
551        Extension::BacktickCodeBlocks,
552        Extension::DefinitionLists,
553        Extension::Footnotes,
554        Extension::ImplicitFigures,
555        Extension::ImplicitHeaderReferences,
556        Extension::IntrawordUnderscores,
557        Extension::MarkdownAttribute,
558        Extension::MmdHeaderIdentifiers,
559        Extension::MmdLinkAttributes,
560        Extension::MmdTitleBlock,
561        Extension::PipeTables,
562        Extension::RawAttribute,
563        Extension::RawHtml,
564        Extension::ShortSubsuperscripts,
565        Extension::ShortcutReferenceLinks,
566        Extension::SpacedReferenceLinks,
567        Extension::Subscript,
568        Extension::Superscript,
569        Extension::TexMathDollars,
570        Extension::TexMathDoubleBackslash,
571    ]);
572}
573
574#[cfg(test)]
575mod tests {
576    use super::{Extension, Extensions, presets};
577
578    #[test]
579    fn words_cover_every_variant() {
580        // Every variant's bit index must land inside the backing array.
581        for ext in Extension::ALL {
582            assert!((*ext as usize) / super::WORD_BITS < super::WORDS);
583        }
584    }
585
586    #[test]
587    fn insert_remove_contains_round_trip() {
588        let mut set = Extensions::empty();
589        assert!(set.is_empty());
590        assert!(!set.contains(Extension::Footnotes));
591        set.insert(Extension::Footnotes);
592        assert!(set.contains(Extension::Footnotes));
593        assert!(!set.is_empty());
594        set.remove(Extension::Footnotes);
595        assert!(!set.contains(Extension::Footnotes));
596        assert!(set.is_empty());
597    }
598
599    #[test]
600    fn from_list_and_iter_follow_declaration_order() {
601        let set = Extensions::from_list(&[Extension::PipeTables, Extension::Smart]);
602        let collected: Vec<Extension> = set.iter().collect();
603        // `iter` yields in `ALL` order, regardless of `from_list` argument order.
604        assert_eq!(collected, vec![Extension::Smart, Extension::PipeTables]);
605    }
606
607    #[test]
608    fn commonmark_preset_is_empty_gfm_is_not() {
609        assert!(presets::COMMONMARK.is_empty());
610        assert!(presets::GFM.contains(Extension::Strikeout));
611        assert!(presets::GFM.contains(Extension::TaskLists));
612        assert!(presets::GFM.contains(Extension::PipeTables));
613        // GFM has no subscript/superscript; those belong to the broader Markdown dialects.
614        assert!(!presets::GFM.contains(Extension::Subscript));
615        assert!(!presets::GFM.contains(Extension::Superscript));
616    }
617
618    #[test]
619    fn markdown_and_commonmark_x_presets_are_broad() {
620        assert!(presets::MARKDOWN.contains(Extension::DefinitionLists));
621        assert!(presets::MARKDOWN.contains(Extension::YamlMetadataBlock));
622        assert!(presets::MARKDOWN.contains(Extension::Smart));
623        assert!(presets::COMMONMARK_X.contains(Extension::FencedDivs));
624        assert!(presets::COMMONMARK_X.contains(Extension::Attributes));
625        // The strict CommonMark dialect keeps none of these.
626        assert!(presets::COMMONMARK.is_empty());
627    }
628
629    #[test]
630    fn code_and_math_surface_variants_round_trip_and_seed_presets() {
631        for token in ["fenced_code_blocks", "backtick_code_blocks", "tex_math_gfm"] {
632            let ext = Extension::from_name(token).expect("a declared variant");
633            assert_eq!(ext.name(), token);
634        }
635        // The Markdown dialect fences code with both backtick and tilde forms.
636        assert!(presets::MARKDOWN.contains(Extension::FencedCodeBlocks));
637        assert!(presets::MARKDOWN.contains(Extension::BacktickCodeBlocks));
638        // GFM fences with backticks and renders math in its own surface; it has no tilde-fence form.
639        assert!(presets::GFM.contains(Extension::BacktickCodeBlocks));
640        assert!(presets::GFM.contains(Extension::TexMathGfm));
641        assert!(!presets::GFM.contains(Extension::FencedCodeBlocks));
642    }
643
644    #[test]
645    fn names_are_stable() {
646        assert_eq!(Extension::Footnotes.name(), "footnotes");
647        assert_eq!(Extension::Autolink.name(), "autolink_bare_uris");
648        assert_eq!(Extension::HardLineBreaks.name(), "hard_line_breaks");
649        assert_eq!(Extension::RawHtml.name(), "raw_html");
650    }
651
652    #[test]
653    fn from_name_round_trips_every_variant() {
654        for ext in Extension::ALL {
655            assert_eq!(Extension::from_name(ext.name()), Some(*ext));
656        }
657        assert_eq!(Extension::from_name("not_an_extension"), None);
658        assert_eq!(Extension::from_name(""), None);
659    }
660
661    #[test]
662    fn union_combines_both_sides() {
663        let a = Extensions::from_list(&[Extension::Strikeout]);
664        let b = Extensions::from_list(&[Extension::Subscript]);
665        let combined = a.union(b);
666        assert!(combined.contains(Extension::Strikeout));
667        assert!(combined.contains(Extension::Subscript));
668        assert!(!combined.contains(Extension::Superscript));
669        assert_eq!(a.union(Extensions::empty()), a);
670    }
671}