carta_core/extensions.rs
1//! Format extensions: the set of optional syntax features a reader or writer may honor.
2//!
3//! [`Extension`] is one named feature; [`Extensions`] is a deterministic, allocation-free set of them
4//! backed by a fixed array of 64-bit words. The set carries no 128-variant ceiling, so it scales to
5//! the full extension set. [`presets`] holds the per-flavor sets; strict `CommonMark` is the empty set.
6
7/// Generates the [`Extension`] enum together with the `ALL`/`COUNT`/`name` metadata, keeping the
8/// variant list as the single source of truth for the bitset sizing in [`Extensions`].
9macro_rules! define_extensions {
10 ($($(#[$attribute:meta])* $variant:ident => $name:literal),+ $(,)?) => {
11 /// A single format extension. Each variant's position in [`Extension::ALL`] is its bit
12 /// index in [`Extensions`].
13 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
14 #[non_exhaustive]
15 pub enum Extension { $($(#[$attribute])* $variant),+ }
16
17 impl Extension {
18 /// Every extension, in declaration order.
19 pub const ALL: &'static [Extension] = &[$(Extension::$variant),+];
20 /// The number of distinct extensions.
21 pub const COUNT: usize = Self::ALL.len();
22
23 /// The extension's identifier (e.g. `"footnotes"`).
24 #[must_use]
25 pub const fn name(self) -> &'static str {
26 match self { $(Extension::$variant => $name),+ }
27 }
28
29 /// The extension named `name`, or `None` if no extension uses that identifier.
30 #[must_use]
31 pub fn from_name(name: &str) -> Option<Extension> {
32 match name { $($name => Some(Extension::$variant),)+ _ => None }
33 }
34 }
35 };
36}
37
38define_extensions! {
39 /// Straight quotes, `...`, `--`, and `---` become curly quotes, an ellipsis, and en/em dashes.
40 Smart => "smart",
41 /// `~~text~~` strikeout spans.
42 Strikeout => "strikeout",
43 /// `^text^` superscript spans.
44 Superscript => "superscript",
45 /// `~text~` subscript spans.
46 Subscript => "subscript",
47 /// Pipe tables: `|`-separated cells with a delimiter row carrying the column alignments.
48 PipeTables => "pipe_tables",
49 /// `[^label]` footnote references with separately defined note bodies.
50 Footnotes => "footnotes",
51 /// `- [ ]` / `- [x]` task-list items.
52 TaskLists => "task_lists",
53 /// A bare absolute URI or `www.` address in running text becomes a link.
54 Autolink => "autolink_bare_uris",
55 /// `$…$` inline and `$$…$$` display math.
56 TexMathDollars => "tex_math_dollars",
57 /// `:::`-fenced divs carrying an attribute block or a bare class name.
58 FencedDivs => "fenced_divs",
59 /// `[text]{.class}` spans: bracketed text followed by an attribute block.
60 BracketedSpans => "bracketed_spans",
61 /// Every newline within a paragraph is a hard line break.
62 HardLineBreaks => "hard_line_breaks",
63 /// Raw HTML tags and blocks are carried through rather than treated as text.
64 RawHtml => "raw_html",
65 /// A `{#id .class key=val}` attribute block on a header line.
66 HeaderAttributes => "header_attributes",
67 /// An attribute block on a fenced code block's opening line.
68 FencedCodeAttributes => "fenced_code_attributes",
69 /// An attribute block after an inline code span.
70 InlineCodeAttributes => "inline_code_attributes",
71 /// An attribute block after a link or image.
72 LinkAttributes => "link_attributes",
73 /// The combined attribute toggle: the attribute syntaxes enabled as a group.
74 Attributes => "attributes",
75 /// Definition lists: a term line followed by `:`-marked definition blocks.
76 DefinitionLists => "definition_lists",
77 /// Grid tables drawn with `+---+` cell borders.
78 GridTables => "grid_tables",
79 /// Multiline tables, whose cells may continue across several source lines.
80 MultilineTables => "multiline_tables",
81 /// Simple tables: columns aligned under a dashed header line.
82 SimpleTables => "simple_tables",
83 /// A `Table:` (or bare `:`) caption line attached to a table.
84 TableCaptions => "table_captions",
85 /// `|`-prefixed line blocks, preserving the source's line divisions.
86 LineBlocks => "line_blocks",
87 /// Ordered-list markers beyond decimal numbers: letters, roman numerals, and `)` delimiters.
88 FancyLists => "fancy_lists",
89 /// `(@label)` example lists, numbered sequentially across the whole document.
90 ExampleLists => "example_lists",
91 /// An ordered list starts at the number its first marker carries rather than 1.
92 Startnum => "startnum",
93 /// A `---`-delimited YAML metadata block.
94 YamlMetadataBlock => "yaml_metadata_block",
95 /// A `%`-prefixed title/author/date block at the top of the document.
96 PandocTitleBlock => "pandoc_title_block",
97 /// A header without an explicit identifier gets one derived from its text.
98 AutoIdentifiers => "auto_identifiers",
99 /// Derived header identifiers use the `GitHub` slug form: lowercased, punctuation dropped,
100 /// spaces to hyphens.
101 GfmAutoIdentifiers => "gfm_auto_identifiers",
102 /// Fold a derived identifier down to ASCII, dropping diacritics before the slug is formed.
103 AsciiIdentifiers => "ascii_identifiers",
104 /// A header's explicit identifier is written in `MultiMarkdown`'s trailing `[id]` form rather
105 /// than the `{#id}` attribute block.
106 MmdHeaderIdentifiers => "mmd_header_identifiers",
107 /// A header's own text works as a reference-link label for that header.
108 ImplicitHeaderReferences => "implicit_header_references",
109 /// A bare image with a caption becomes a figure.
110 ImplicitFigures => "implicit_figures",
111 /// Raw passthrough: `` `code`{=fmt} `` inline and ```` ```{=fmt} ```` fenced blocks.
112 RawAttribute => "raw_attribute",
113 /// A `^[…]` inline note expands to a footnote in place.
114 InlineNotes => "inline_notes",
115 /// A block-level `<div>` becomes a `Div`, with Markdown parsed inside.
116 NativeDivs => "native_divs",
117 /// An inline `<span>` becomes a `Span`, with Markdown parsed inside.
118 NativeSpans => "native_spans",
119 /// Markdown is parsed inside block-level HTML, which is otherwise split tag-by-tag.
120 MarkdownInHtmlBlocks => "markdown_in_html_blocks",
121 /// A `<div>`/`<span>` emitted for a div/span carries a `data-markdown="1"` marker so its
122 /// contents are still parsed as Markdown; this also forces a div with no native syntax into an
123 /// HTML wrap.
124 MarkdownAttribute => "markdown_attribute",
125 /// Inline raw `TeX` (`\command{…}`, `\begin{env}…\end{env}`) passes through verbatim.
126 RawTex => "raw_tex",
127 /// `[@key]` / `@key` citation references.
128 Citations => "citations",
129 /// An attribute block on a table's caption line attaches to the table.
130 TableAttributes => "table_attributes",
131 /// A blank line is required before a blockquote, so one never interrupts a paragraph.
132 BlankBeforeBlockquote => "blank_before_blockquote",
133 /// A blank line is required before a header, so one never interrupts a paragraph.
134 BlankBeforeHeader => "blank_before_header",
135 /// `==text==` highlight spans.
136 Mark => "mark",
137 /// `:name:` emoji shortcodes.
138 Emoji => "emoji",
139 /// `> [!NOTE]`-style admonition blockquotes become classed divs.
140 Alerts => "alerts",
141 /// `\(…\)` inline and `\[…\]` display math delimiters.
142 TexMathSingleBackslash => "tex_math_single_backslash",
143 /// `\\(…\\)` inline and `\\[…\\]` display math delimiters.
144 TexMathDoubleBackslash => "tex_math_double_backslash",
145 /// Tilde-fenced (`~~~`) code blocks; with no fence form available, code is written in the
146 /// four-space indented form.
147 FencedCodeBlocks => "fenced_code_blocks",
148 /// Backtick-fenced code blocks.
149 BacktickCodeBlocks => "backtick_code_blocks",
150 /// The `GitHub` math surface: inline `` $`…`$ `` and a ```` ```math ```` display block, as
151 /// opposed to the `$…$`/`$$…$$` dollar form.
152 TexMathGfm => "tex_math_gfm",
153 /// A backslash at a line's end is a hard line break, written as a trailing `\`; without it the
154 /// writer falls back to two trailing spaces.
155 EscapedLineBreaks => "escaped_line_breaks",
156 /// An underscore inside a word opens no emphasis, so the writer leaves intra-word `_` literal;
157 /// without it every `_` is escaped so a strict reader cannot start emphasis mid-word.
158 IntrawordUnderscores => "intraword_underscores",
159 /// A list may begin directly after a paragraph line with no intervening blank line,
160 /// interrupting it; without it a list marker on the line after a paragraph folds into that
161 /// paragraph.
162 ListsWithoutPrecedingBlankline => "lists_without_preceding_blankline",
163 /// `*[SHY]: Soft hyphen` abbreviation definitions, applied to later occurrences of the term.
164 Abbreviations => "abbreviations",
165 /// A backslash escapes any symbol, not only the ASCII-punctuation subset.
166 AllSymbolsEscapable => "all_symbols_escapable",
167 /// A backslash before `<` or `>` escapes the angle bracket.
168 AngleBracketsEscapable => "angle_brackets_escapable",
169 /// Line breaks between East Asian wide characters carry no width and are dropped.
170 EastAsianLineBreaks => "east_asian_line_breaks",
171 /// An indented code block requires four spaces of indentation rather than one tab stop.
172 FourSpaceRule => "four_space_rule",
173 /// Typographic conventions of the Project Gutenberg style for plain-text output.
174 Gutenberg => "gutenberg",
175 /// Soft line breaks within a paragraph are discarded rather than kept as spaces.
176 IgnoreLineBreaks => "ignore_line_breaks",
177 /// User-defined `LaTeX` macros are expanded in math and raw `TeX`.
178 LatexMacros => "latex_macros",
179 /// Bird-track (`> `) literate-program code sections.
180 LiterateHaskell => "literate_haskell",
181 /// An attribute block following a link or image in the `MultiMarkdown` position.
182 MmdLinkAttributes => "mmd_link_attributes",
183 /// A `MultiMarkdown` metadata block at the top of the document.
184 MmdTitleBlock => "mmd_title_block",
185 /// `-` and `--` map to en/em dashes under the older dash convention.
186 OldDashes => "old_dashes",
187 /// A raw block or inline may be written directly as Markdown for round-tripping.
188 RawMarkdown => "raw_markdown",
189 /// Relative paths in links and images are rebased onto the source file's location.
190 RebaseRelativePaths => "rebase_relative_paths",
191 /// `~x` / `^x` subscript and superscript bind only the single following character.
192 ShortSubsuperscripts => "short_subsuperscripts",
193 /// A defined label may be referenced by `[label]` alone, with no following `[]` or `(…)`.
194 ShortcutReferenceLinks => "shortcut_reference_links",
195 /// An ATX header requires a space between the opening `#` run and the heading text.
196 SpaceInAtxHeader => "space_in_atx_header",
197 /// A reference link's label and its following `[id]` may be separated by whitespace.
198 SpacedReferenceLinks => "spaced_reference_links",
199 /// `[[target|title]]` wiki links, with the title following the pipe.
200 WikilinksTitleAfterPipe => "wikilinks_title_after_pipe",
201 /// `[[title|target]]` wiki links, with the title preceding the pipe.
202 WikilinksTitleBeforePipe => "wikilinks_title_before_pipe",
203}
204
205const WORD_BITS: usize = u64::BITS as usize;
206const WORDS: usize = Extension::COUNT.div_ceil(WORD_BITS);
207
208// The bitset indexing in `from_list` is sound only while each variant's discriminant equals its
209// position in `ALL` (so every `ext as usize` lands in `0..COUNT`). The macro emits no explicit
210// discriminants, so this holds — asserted at compile time here, turning a future edit that breaks
211// contiguity into a build failure rather than an out-of-bounds index.
212#[allow(clippy::indexing_slicing)]
213const _: () = {
214 let mut i = 0;
215 while i < Extension::ALL.len() {
216 assert!(Extension::ALL[i] as usize == i);
217 i += 1;
218 }
219};
220
221/// A deterministic, allocation-free set of [`Extension`]s, backed by a fixed array of 64-bit words
222/// indexed by each variant's position in [`Extension::ALL`].
223#[derive(Clone, Copy, PartialEq, Eq)]
224pub struct Extensions([u64; WORDS]);
225
226impl Default for Extensions {
227 fn default() -> Self {
228 Self::empty()
229 }
230}
231
232impl Extensions {
233 /// The empty set (strict `CommonMark`).
234 #[must_use]
235 pub const fn empty() -> Self {
236 Self([0; WORDS])
237 }
238
239 /// The set containing exactly `list`. Const so presets are `const` values.
240 #[must_use]
241 // Const indexing: contiguity (asserted above) gives `bit < COUNT`, so `bit / WORD_BITS < WORDS`;
242 // `i < list.len()`. Both indices are in bounds, and slice `get` is not usable across all const
243 // contexts on the pinned toolchain.
244 #[allow(clippy::indexing_slicing)]
245 pub const fn from_list(list: &[Extension]) -> Self {
246 let mut words = [0u64; WORDS];
247 let mut i = 0;
248 while i < list.len() {
249 let bit = list[i] as usize;
250 words[bit / WORD_BITS] |= 1u64 << (bit % WORD_BITS);
251 i += 1;
252 }
253 Self(words)
254 }
255
256 /// Whether `ext` is in the set.
257 #[must_use]
258 pub fn contains(self, ext: Extension) -> bool {
259 let bit = ext as usize;
260 self.0
261 .get(bit / WORD_BITS)
262 .is_some_and(|word| (word >> (bit % WORD_BITS)) & 1 == 1)
263 }
264
265 /// Adds `ext` to the set.
266 pub fn insert(&mut self, ext: Extension) {
267 let bit = ext as usize;
268 if let Some(word) = self.0.get_mut(bit / WORD_BITS) {
269 *word |= 1u64 << (bit % WORD_BITS);
270 }
271 }
272
273 /// Removes `ext` from the set.
274 pub fn remove(&mut self, ext: Extension) {
275 let bit = ext as usize;
276 if let Some(word) = self.0.get_mut(bit / WORD_BITS) {
277 *word &= !(1u64 << (bit % WORD_BITS));
278 }
279 }
280
281 /// The union of this set and `other`.
282 #[must_use]
283 pub fn union(self, other: Extensions) -> Extensions {
284 let mut words = self.0;
285 for (word, &add) in words.iter_mut().zip(other.0.iter()) {
286 *word |= add;
287 }
288 Extensions(words)
289 }
290
291 /// Whether the set is empty.
292 #[must_use]
293 pub fn is_empty(self) -> bool {
294 self.0.iter().all(|&word| word == 0)
295 }
296
297 /// The set's extensions in [`Extension::ALL`] (deterministic) order.
298 pub fn iter(self) -> impl Iterator<Item = Extension> {
299 Extension::ALL
300 .iter()
301 .copied()
302 .filter(move |&ext| self.contains(ext))
303 }
304}
305
306impl core::fmt::Debug for Extensions {
307 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
308 f.debug_set()
309 .entries(self.iter().map(Extension::name))
310 .finish()
311 }
312}
313
314/// Per-flavor extension sets.
315pub mod presets {
316 use super::{Extension, Extensions};
317
318 /// Strict `CommonMark`: no extensions.
319 pub const COMMONMARK: Extensions = Extensions::empty();
320
321 /// `GitHub`-Flavored Markdown.
322 pub const GFM: Extensions = Extensions::from_list(&[
323 Extension::Strikeout,
324 Extension::PipeTables,
325 Extension::BacktickCodeBlocks,
326 Extension::TaskLists,
327 Extension::Autolink,
328 Extension::Footnotes,
329 Extension::TexMathDollars,
330 Extension::TexMathGfm,
331 Extension::GfmAutoIdentifiers,
332 Extension::RawHtml,
333 Extension::Emoji,
334 Extension::Alerts,
335 ]);
336
337 /// The `commonmark_x` dialect: `CommonMark` with a broad set of inline and block extensions
338 /// enabled. `backtick_code_blocks` is additionally carried because the shared Markdown engine
339 /// fences code on that flag, which `CommonMark` does natively.
340 pub const COMMONMARK_X: Extensions = Extensions::from_list(&[
341 Extension::Smart,
342 Extension::Strikeout,
343 Extension::Superscript,
344 Extension::Subscript,
345 Extension::PipeTables,
346 Extension::Footnotes,
347 Extension::TaskLists,
348 Extension::TexMathDollars,
349 Extension::FencedDivs,
350 Extension::BracketedSpans,
351 Extension::BacktickCodeBlocks,
352 Extension::RawHtml,
353 Extension::RawAttribute,
354 Extension::Attributes,
355 Extension::HeaderAttributes,
356 Extension::FencedCodeAttributes,
357 Extension::InlineCodeAttributes,
358 Extension::LinkAttributes,
359 Extension::DefinitionLists,
360 Extension::FancyLists,
361 Extension::GfmAutoIdentifiers,
362 Extension::ImplicitHeaderReferences,
363 Extension::Emoji,
364 Extension::Alerts,
365 ]);
366
367 /// The extended Markdown dialect: the broad default extension set.
368 pub const MARKDOWN: Extensions = Extensions::from_list(&[
369 Extension::AllSymbolsEscapable,
370 Extension::Smart,
371 Extension::Strikeout,
372 Extension::Superscript,
373 Extension::Subscript,
374 Extension::PipeTables,
375 Extension::Footnotes,
376 Extension::TaskLists,
377 Extension::TexMathDollars,
378 Extension::FencedDivs,
379 Extension::BracketedSpans,
380 Extension::RawHtml,
381 Extension::HeaderAttributes,
382 Extension::FencedCodeAttributes,
383 Extension::FencedCodeBlocks,
384 Extension::BacktickCodeBlocks,
385 Extension::InlineCodeAttributes,
386 Extension::LinkAttributes,
387 Extension::DefinitionLists,
388 Extension::GridTables,
389 Extension::MultilineTables,
390 Extension::SimpleTables,
391 Extension::TableCaptions,
392 Extension::LineBlocks,
393 Extension::FancyLists,
394 Extension::ExampleLists,
395 Extension::Startnum,
396 Extension::YamlMetadataBlock,
397 Extension::PandocTitleBlock,
398 Extension::AutoIdentifiers,
399 Extension::ImplicitHeaderReferences,
400 Extension::ImplicitFigures,
401 Extension::RawAttribute,
402 Extension::InlineNotes,
403 Extension::NativeDivs,
404 Extension::NativeSpans,
405 Extension::MarkdownInHtmlBlocks,
406 Extension::RawTex,
407 Extension::Citations,
408 Extension::TableAttributes,
409 Extension::BlankBeforeBlockquote,
410 Extension::BlankBeforeHeader,
411 Extension::EscapedLineBreaks,
412 Extension::IntrawordUnderscores,
413 Extension::SpaceInAtxHeader,
414 ]);
415
416 /// The legacy GitHub Markdown dialect (`markdown_github`). The set is restricted to the
417 /// variants that exist and affect writer output: backtick-fenced code, pipe tables, strikeout,
418 /// task lists, footnotes, autolinking, emoji, and alerts, but no smart typography, math, spans,
419 /// or fenced divs.
420 pub const MARKDOWN_GITHUB: Extensions = Extensions::from_list(&[
421 Extension::Strikeout,
422 Extension::PipeTables,
423 Extension::Footnotes,
424 Extension::TaskLists,
425 Extension::Autolink,
426 Extension::RawHtml,
427 Extension::FencedCodeBlocks,
428 Extension::BacktickCodeBlocks,
429 Extension::AutoIdentifiers,
430 Extension::GfmAutoIdentifiers,
431 Extension::Emoji,
432 Extension::Alerts,
433 Extension::IntrawordUnderscores,
434 ]);
435
436 /// The PHP Markdown Extra dialect (`markdown_phpextra`). The set is restricted to the variants
437 /// that exist and affect writer output: definition lists, fenced (tilde) code blocks, footnotes,
438 /// header and link attributes, pipe tables, and raw HTML. It has no backtick code fences, so code
439 /// fences are written with tildes, and no smart typography, math, strikeout, spans, or fenced divs.
440 pub const MARKDOWN_PHPEXTRA: Extensions = Extensions::from_list(&[
441 Extension::DefinitionLists,
442 Extension::FencedCodeBlocks,
443 Extension::Footnotes,
444 Extension::HeaderAttributes,
445 Extension::IntrawordUnderscores,
446 Extension::LinkAttributes,
447 Extension::MarkdownAttribute,
448 Extension::PipeTables,
449 Extension::RawHtml,
450 ]);
451
452 /// The `MultiMarkdown` dialect (`markdown_mmd`). The set is restricted to the variants that
453 /// exist and affect writer output: backtick-fenced code, definition lists, footnotes, pipe
454 /// tables, implicit figures and header references, sub/superscript, dollar math, raw HTML and raw
455 /// attributes, auto identifiers, `MultiMarkdown`'s trailing `[id]` header identifiers, and the
456 /// `data-markdown`
457 /// div marker. It has no header attribute blocks, strikeout, task lists, smart typography, spans,
458 /// or fenced divs. With `tex_math_dollars` on and taking precedence, a `tex_math_double_backslash`
459 /// surface would not change this dialect's writer output, so it is left out of the preset and math
460 /// is emitted as `$…$`.
461 pub const MARKDOWN_MMD: Extensions = Extensions::from_list(&[
462 Extension::AutoIdentifiers,
463 Extension::BacktickCodeBlocks,
464 Extension::DefinitionLists,
465 Extension::Footnotes,
466 Extension::ImplicitFigures,
467 Extension::ImplicitHeaderReferences,
468 Extension::IntrawordUnderscores,
469 Extension::MarkdownAttribute,
470 Extension::MmdHeaderIdentifiers,
471 Extension::PipeTables,
472 Extension::RawAttribute,
473 Extension::RawHtml,
474 Extension::Subscript,
475 Extension::Superscript,
476 Extension::TexMathDollars,
477 ]);
478
479 /// The original Markdown dialect (`markdown_strict`). The set is restricted to the variants that
480 /// exist and affect writer output — only raw HTML. With no fenced or backtick code, tables,
481 /// definition lists,
482 /// footnotes, task lists, math, or any attribute syntax, every richer construct falls back to
483 /// indented code, an HTML block, or a raw glyph. Lacking `intraword_underscores`, every `_` is
484 /// escaped; lacking `pipe_tables`, a literal `|` is left unescaped.
485 pub const MARKDOWN_STRICT: Extensions = Extensions::from_list(&[Extension::RawHtml]);
486
487 // The reader default sets below are broader than the writer presets above: a reader enables every
488 // construct the dialect can parse, whereas the writer presets carry only the extensions that shape
489 // the emitted text. Some entries name constructs the shared Markdown engine does not yet branch on;
490 // they are recorded so the dialect's default surface is complete and takes effect once modeled.
491
492 /// Reader defaults for the original Markdown dialect (`markdown_strict`): only raw HTML, plus the
493 /// shortcut and spaced reference-link forms.
494 pub const MARKDOWN_STRICT_READ: Extensions = Extensions::from_list(&[
495 Extension::RawHtml,
496 Extension::ShortcutReferenceLinks,
497 Extension::SpacedReferenceLinks,
498 ]);
499
500 /// Reader defaults for the GitHub Markdown dialect (`markdown_github`): the GitHub construct set —
501 /// strikeout, task lists, pipe tables, footnotes, bare-URI autolinking, emoji, alerts, backtick and
502 /// fenced code, auto identifiers in both forms, intra-word underscores, lists that open without a
503 /// preceding blank line, and the escaping/heading-spacing leniencies.
504 pub const MARKDOWN_GITHUB_READ: Extensions = Extensions::from_list(&[
505 Extension::Alerts,
506 Extension::AllSymbolsEscapable,
507 Extension::AutoIdentifiers,
508 Extension::Autolink,
509 Extension::BacktickCodeBlocks,
510 Extension::Emoji,
511 Extension::FencedCodeBlocks,
512 Extension::Footnotes,
513 Extension::GfmAutoIdentifiers,
514 Extension::IntrawordUnderscores,
515 Extension::ListsWithoutPrecedingBlankline,
516 Extension::PipeTables,
517 Extension::RawHtml,
518 Extension::ShortcutReferenceLinks,
519 Extension::SpaceInAtxHeader,
520 Extension::Strikeout,
521 Extension::TaskLists,
522 ]);
523
524 /// Reader defaults for the PHP Markdown Extra dialect (`markdown_phpextra`): abbreviations,
525 /// definition lists, fenced code, footnotes, header and link attributes, intra-word underscores,
526 /// the `data-markdown` div marker, pipe tables, raw HTML, and the reference-link forms.
527 pub const MARKDOWN_PHPEXTRA_READ: Extensions = Extensions::from_list(&[
528 Extension::Abbreviations,
529 Extension::DefinitionLists,
530 Extension::FencedCodeBlocks,
531 Extension::Footnotes,
532 Extension::HeaderAttributes,
533 Extension::IntrawordUnderscores,
534 Extension::LinkAttributes,
535 Extension::MarkdownAttribute,
536 Extension::PipeTables,
537 Extension::RawHtml,
538 Extension::ShortcutReferenceLinks,
539 Extension::SpacedReferenceLinks,
540 ]);
541
542 /// Reader defaults for the `MultiMarkdown` dialect (`markdown_mmd`): auto identifiers, backtick
543 /// code, definition lists, footnotes, implicit figures and header references, intra-word
544 /// underscores, the `data-markdown` div marker, `MultiMarkdown`'s trailing `[id]` header
545 /// identifiers, its link-attribute and title-block forms, pipe tables, raw HTML and raw attributes,
546 /// single-character sub/superscripts, the reference-link forms, sub/superscript spans, dollar math,
547 /// and the double-backslash math delimiters.
548 pub const MARKDOWN_MMD_READ: Extensions = Extensions::from_list(&[
549 Extension::AllSymbolsEscapable,
550 Extension::AutoIdentifiers,
551 Extension::BacktickCodeBlocks,
552 Extension::DefinitionLists,
553 Extension::Footnotes,
554 Extension::ImplicitFigures,
555 Extension::ImplicitHeaderReferences,
556 Extension::IntrawordUnderscores,
557 Extension::MarkdownAttribute,
558 Extension::MmdHeaderIdentifiers,
559 Extension::MmdLinkAttributes,
560 Extension::MmdTitleBlock,
561 Extension::PipeTables,
562 Extension::RawAttribute,
563 Extension::RawHtml,
564 Extension::ShortSubsuperscripts,
565 Extension::ShortcutReferenceLinks,
566 Extension::SpacedReferenceLinks,
567 Extension::Subscript,
568 Extension::Superscript,
569 Extension::TexMathDollars,
570 Extension::TexMathDoubleBackslash,
571 ]);
572}
573
574#[cfg(test)]
575mod tests {
576 use super::{Extension, Extensions, presets};
577
578 #[test]
579 fn words_cover_every_variant() {
580 // Every variant's bit index must land inside the backing array.
581 for ext in Extension::ALL {
582 assert!((*ext as usize) / super::WORD_BITS < super::WORDS);
583 }
584 }
585
586 #[test]
587 fn insert_remove_contains_round_trip() {
588 let mut set = Extensions::empty();
589 assert!(set.is_empty());
590 assert!(!set.contains(Extension::Footnotes));
591 set.insert(Extension::Footnotes);
592 assert!(set.contains(Extension::Footnotes));
593 assert!(!set.is_empty());
594 set.remove(Extension::Footnotes);
595 assert!(!set.contains(Extension::Footnotes));
596 assert!(set.is_empty());
597 }
598
599 #[test]
600 fn from_list_and_iter_follow_declaration_order() {
601 let set = Extensions::from_list(&[Extension::PipeTables, Extension::Smart]);
602 let collected: Vec<Extension> = set.iter().collect();
603 // `iter` yields in `ALL` order, regardless of `from_list` argument order.
604 assert_eq!(collected, vec![Extension::Smart, Extension::PipeTables]);
605 }
606
607 #[test]
608 fn commonmark_preset_is_empty_gfm_is_not() {
609 assert!(presets::COMMONMARK.is_empty());
610 assert!(presets::GFM.contains(Extension::Strikeout));
611 assert!(presets::GFM.contains(Extension::TaskLists));
612 assert!(presets::GFM.contains(Extension::PipeTables));
613 // GFM has no subscript/superscript; those belong to the broader Markdown dialects.
614 assert!(!presets::GFM.contains(Extension::Subscript));
615 assert!(!presets::GFM.contains(Extension::Superscript));
616 }
617
618 #[test]
619 fn markdown_and_commonmark_x_presets_are_broad() {
620 assert!(presets::MARKDOWN.contains(Extension::DefinitionLists));
621 assert!(presets::MARKDOWN.contains(Extension::YamlMetadataBlock));
622 assert!(presets::MARKDOWN.contains(Extension::Smart));
623 assert!(presets::COMMONMARK_X.contains(Extension::FencedDivs));
624 assert!(presets::COMMONMARK_X.contains(Extension::Attributes));
625 // The strict CommonMark dialect keeps none of these.
626 assert!(presets::COMMONMARK.is_empty());
627 }
628
629 #[test]
630 fn code_and_math_surface_variants_round_trip_and_seed_presets() {
631 for token in ["fenced_code_blocks", "backtick_code_blocks", "tex_math_gfm"] {
632 let ext = Extension::from_name(token).expect("a declared variant");
633 assert_eq!(ext.name(), token);
634 }
635 // The Markdown dialect fences code with both backtick and tilde forms.
636 assert!(presets::MARKDOWN.contains(Extension::FencedCodeBlocks));
637 assert!(presets::MARKDOWN.contains(Extension::BacktickCodeBlocks));
638 // GFM fences with backticks and renders math in its own surface; it has no tilde-fence form.
639 assert!(presets::GFM.contains(Extension::BacktickCodeBlocks));
640 assert!(presets::GFM.contains(Extension::TexMathGfm));
641 assert!(!presets::GFM.contains(Extension::FencedCodeBlocks));
642 }
643
644 #[test]
645 fn names_are_stable() {
646 assert_eq!(Extension::Footnotes.name(), "footnotes");
647 assert_eq!(Extension::Autolink.name(), "autolink_bare_uris");
648 assert_eq!(Extension::HardLineBreaks.name(), "hard_line_breaks");
649 assert_eq!(Extension::RawHtml.name(), "raw_html");
650 }
651
652 #[test]
653 fn from_name_round_trips_every_variant() {
654 for ext in Extension::ALL {
655 assert_eq!(Extension::from_name(ext.name()), Some(*ext));
656 }
657 assert_eq!(Extension::from_name("not_an_extension"), None);
658 assert_eq!(Extension::from_name(""), None);
659 }
660
661 #[test]
662 fn union_combines_both_sides() {
663 let a = Extensions::from_list(&[Extension::Strikeout]);
664 let b = Extensions::from_list(&[Extension::Subscript]);
665 let combined = a.union(b);
666 assert!(combined.contains(Extension::Strikeout));
667 assert!(combined.contains(Extension::Subscript));
668 assert!(!combined.contains(Extension::Superscript));
669 assert_eq!(a.union(Extensions::empty()), a);
670 }
671}