Skip to main content

panache_parser/
options.rs

1use std::collections::{HashMap, HashSet};
2use std::sync::Arc;
3
4/// The flavor of Markdown to parse and format.
5/// Each flavor has a different set of default extensions enabled.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
9pub enum Flavor {
10    /// Standard Pandoc Markdown (default extensions enabled)
11    #[default]
12    Pandoc,
13    /// Quarto (Pandoc + Quarto-specific extensions)
14    Quarto,
15    /// R Markdown (Pandoc + R-specific extensions)
16    #[cfg_attr(feature = "serde", serde(rename = "rmarkdown"))]
17    RMarkdown,
18    /// GitHub Flavored Markdown
19    Gfm,
20    /// CommonMark
21    #[cfg_attr(feature = "serde", serde(alias = "commonmark"))]
22    CommonMark,
23    /// MultiMarkdown
24    #[cfg_attr(feature = "serde", serde(rename = "multimarkdown"))]
25    MultiMarkdown,
26}
27
28/// Pandoc/Markdown extensions configuration.
29/// Each field represents a specific Pandoc extension.
30/// Extensions marked with a comment indicate implementation status.
31#[derive(Debug, Clone, PartialEq)]
32#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
33#[cfg_attr(feature = "serde", serde(default))]
34#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
35#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
36pub struct Extensions {
37    // ===== Block-level extensions =====
38
39    // Headings
40    /// Require blank line before headers (default: enabled)
41    #[cfg_attr(feature = "serde", serde(alias = "blank_before_header"))]
42    pub blank_before_header: bool,
43    /// Full attribute syntax on headers {#id .class key=value}
44    #[cfg_attr(feature = "serde", serde(alias = "header_attributes"))]
45    pub header_attributes: bool,
46    /// Auto-generate identifiers from headings
47    pub auto_identifiers: bool,
48    /// Use GitHub's algorithm for auto-generated heading identifiers
49    pub gfm_auto_identifiers: bool,
50    /// Implicit header references ([Heading] links to header)
51    pub implicit_header_references: bool,
52
53    // Block quotes
54    /// Require blank line before blockquotes (default: enabled)
55    #[cfg_attr(feature = "serde", serde(alias = "blank_before_blockquote"))]
56    pub blank_before_blockquote: bool,
57
58    // Lists
59    /// Fancy list markers (roman numerals, letters, etc.)
60    #[cfg_attr(feature = "serde", serde(alias = "fancy_lists"))]
61    pub fancy_lists: bool,
62    /// Start ordered lists at arbitrary numbers
63    pub startnum: bool,
64    /// Example lists with (@) markers
65    #[cfg_attr(feature = "serde", serde(alias = "example_lists"))]
66    pub example_lists: bool,
67    /// GitHub-style task lists - [ ] and - [x]
68    #[cfg_attr(feature = "serde", serde(alias = "task_lists"))]
69    pub task_lists: bool,
70    /// Term/definition syntax
71    #[cfg_attr(feature = "serde", serde(alias = "definition_lists"))]
72    pub definition_lists: bool,
73    /// Allow lists without a preceding blank line
74    #[cfg_attr(feature = "serde", serde(alias = "lists_without_preceding_blankline"))]
75    pub lists_without_preceding_blankline: bool,
76
77    // Code blocks
78    /// Fenced code blocks with backticks
79    #[cfg_attr(feature = "serde", serde(alias = "backtick_code_blocks"))]
80    pub backtick_code_blocks: bool,
81    /// Fenced code blocks with tildes
82    #[cfg_attr(feature = "serde", serde(alias = "fenced_code_blocks"))]
83    pub fenced_code_blocks: bool,
84    /// Attributes on fenced code blocks {.language #id}
85    #[cfg_attr(feature = "serde", serde(alias = "fenced_code_attributes"))]
86    pub fenced_code_attributes: bool,
87    /// Executable code syntax (currently fenced chunks like ```{r} / ```{python})
88    pub executable_code: bool,
89    /// R Markdown inline executable code (`...`r ...)
90    pub rmarkdown_inline_code: bool,
91    /// Quarto inline executable code (`...`{r} ...)
92    pub quarto_inline_code: bool,
93    /// Attributes on inline code
94    #[cfg_attr(feature = "serde", serde(alias = "inline_code_attributes"))]
95    pub inline_code_attributes: bool,
96
97    // Tables
98    /// Simple table syntax
99    #[cfg_attr(feature = "serde", serde(alias = "simple_tables"))]
100    pub simple_tables: bool,
101    /// Multiline cell content in tables
102    #[cfg_attr(feature = "serde", serde(alias = "multiline_tables"))]
103    pub multiline_tables: bool,
104    /// Grid-style tables
105    #[cfg_attr(feature = "serde", serde(alias = "grid_tables"))]
106    pub grid_tables: bool,
107    /// Pipe tables (GitHub/PHP Markdown style)
108    #[cfg_attr(feature = "serde", serde(alias = "pipe_tables"))]
109    pub pipe_tables: bool,
110    /// Table captions
111    #[cfg_attr(feature = "serde", serde(alias = "table_captions"))]
112    pub table_captions: bool,
113
114    // Divs
115    /// Fenced divs ::: {.class}
116    #[cfg_attr(feature = "serde", serde(alias = "fenced_divs"))]
117    pub fenced_divs: bool,
118    /// HTML <div> elements
119    #[cfg_attr(feature = "serde", serde(alias = "native_divs"))]
120    pub native_divs: bool,
121
122    // Other block elements
123    /// Line blocks for poetry | prefix
124    #[cfg_attr(feature = "serde", serde(alias = "line_blocks"))]
125    pub line_blocks: bool,
126
127    // ===== Inline elements =====
128
129    // Emphasis
130    /// Underscores don't trigger emphasis in snake_case
131    #[cfg_attr(feature = "serde", serde(alias = "intraword_underscores"))]
132    pub intraword_underscores: bool,
133    /// Strikethrough ~~text~~
134    pub strikeout: bool,
135    /// Superscript and subscript ^super^ ~sub~
136    pub superscript: bool,
137    pub subscript: bool,
138
139    // Links
140    /// Inline links [text](url)
141    #[cfg_attr(feature = "serde", serde(alias = "inline_links"))]
142    pub inline_links: bool,
143    /// Reference links [text][ref]
144    #[cfg_attr(feature = "serde", serde(alias = "reference_links"))]
145    pub reference_links: bool,
146    /// Shortcut reference links [ref] without second []
147    #[cfg_attr(feature = "serde", serde(alias = "shortcut_reference_links"))]
148    pub shortcut_reference_links: bool,
149    /// Attributes on links [text](url){.class}
150    #[cfg_attr(feature = "serde", serde(alias = "link_attributes"))]
151    pub link_attributes: bool,
152    /// Automatic links <http://example.com>
153    pub autolinks: bool,
154
155    // Images
156    /// Inline images ![alt](url)
157    #[cfg_attr(feature = "serde", serde(alias = "inline_images"))]
158    pub inline_images: bool,
159    /// Paragraph with just image becomes figure
160    #[cfg_attr(feature = "serde", serde(alias = "implicit_figures"))]
161    pub implicit_figures: bool,
162
163    // Math
164    /// Dollar-delimited math $x$ and $$equation$$
165    #[cfg_attr(feature = "serde", serde(alias = "tex_math_dollars"))]
166    pub tex_math_dollars: bool,
167    /// [NON-DEFAULT] GFM math: inline $`...`$ and fenced ``` math blocks
168    #[cfg_attr(feature = "serde", serde(alias = "tex_math_gfm"))]
169    pub tex_math_gfm: bool,
170    /// [NON-DEFAULT] Single backslash math \(...\) and \[...\] (RMarkdown default)
171    #[cfg_attr(feature = "serde", serde(alias = "tex_math_single_backslash"))]
172    pub tex_math_single_backslash: bool,
173    /// [NON-DEFAULT] Double backslash math \\(...\\) and \\[...\\]
174    #[cfg_attr(feature = "serde", serde(alias = "tex_math_double_backslash"))]
175    pub tex_math_double_backslash: bool,
176
177    // Footnotes
178    /// Inline footnotes ^[text]
179    #[cfg_attr(feature = "serde", serde(alias = "inline_footnotes"))]
180    pub inline_footnotes: bool,
181    /// Reference footnotes `[^1]` (requires footnote parsing)
182    pub footnotes: bool,
183
184    // Citations
185    /// Citation syntax [@cite]
186    pub citations: bool,
187
188    // Spans
189    /// Bracketed spans [text]{.class}
190    #[cfg_attr(feature = "serde", serde(alias = "bracketed_spans"))]
191    pub bracketed_spans: bool,
192    /// HTML <span> elements
193    #[cfg_attr(feature = "serde", serde(alias = "native_spans"))]
194    pub native_spans: bool,
195
196    // ===== Metadata =====
197    /// YAML metadata block
198    #[cfg_attr(feature = "serde", serde(alias = "yaml_metadata_block"))]
199    pub yaml_metadata_block: bool,
200    /// Pandoc title block (Title/Author/Date)
201    #[cfg_attr(feature = "serde", serde(alias = "pandoc_title_block"))]
202    pub pandoc_title_block: bool,
203    /// [NON-DEFAULT] MultiMarkdown metadata/title block (Key: Value ...)
204    pub mmd_title_block: bool,
205
206    // ===== Raw content =====
207    /// Raw HTML blocks and inline
208    #[cfg_attr(feature = "serde", serde(alias = "raw_html"))]
209    pub raw_html: bool,
210    /// Markdown inside HTML blocks
211    #[cfg_attr(feature = "serde", serde(alias = "markdown_in_html_blocks"))]
212    pub markdown_in_html_blocks: bool,
213    /// LaTeX commands and environments
214    #[cfg_attr(feature = "serde", serde(alias = "raw_tex"))]
215    pub raw_tex: bool,
216    /// Generic raw blocks with {=format} syntax
217    #[cfg_attr(feature = "serde", serde(alias = "raw_attribute"))]
218    pub raw_attribute: bool,
219
220    // ===== Escapes and special characters =====
221    /// Backslash escapes any symbol
222    #[cfg_attr(feature = "serde", serde(alias = "all_symbols_escapable"))]
223    pub all_symbols_escapable: bool,
224    /// Backslash at line end = hard line break
225    #[cfg_attr(feature = "serde", serde(alias = "escaped_line_breaks"))]
226    pub escaped_line_breaks: bool,
227
228    // ===== NON-DEFAULT EXTENSIONS =====
229    // These are disabled by default in Pandoc
230    /// [NON-DEFAULT] Bare URLs become links
231    #[cfg_attr(feature = "serde", serde(alias = "autolink_bare_uris"))]
232    pub autolink_bare_uris: bool,
233    /// [NON-DEFAULT] Newline = <br>
234    #[cfg_attr(feature = "serde", serde(alias = "hard_line_breaks"))]
235    pub hard_line_breaks: bool,
236    /// [NON-DEFAULT] MultiMarkdown style heading identifiers [my-id]
237    pub mmd_header_identifiers: bool,
238    /// [NON-DEFAULT] MultiMarkdown key=value attributes on reference defs
239    pub mmd_link_attributes: bool,
240    /// [NON-DEFAULT] GitHub/CommonMark alerts in blockquotes (`> [!NOTE]`)
241    pub alerts: bool,
242    /// [NON-DEFAULT] :emoji: syntax
243    pub emoji: bool,
244    /// [NON-DEFAULT] Highlighted ==text==
245    pub mark: bool,
246
247    // ===== Quarto-specific extensions =====
248    /// Quarto callout blocks (.callout-note, etc.)
249    #[cfg_attr(feature = "serde", serde(alias = "quarto_callouts"))]
250    pub quarto_callouts: bool,
251    /// Quarto cross-references @fig-id, @tbl-id
252    #[cfg_attr(feature = "serde", serde(alias = "quarto_crossrefs"))]
253    pub quarto_crossrefs: bool,
254    /// Quarto shortcodes {{< name args >}}
255    #[cfg_attr(feature = "serde", serde(alias = "quarto_shortcodes"))]
256    pub quarto_shortcodes: bool,
257    /// Bookdown references \@ref(label) and (\#label)
258    pub bookdown_references: bool,
259    /// Bookdown equation references in LaTeX math blocks (\#eq:label)
260    pub bookdown_equation_references: bool,
261}
262
263impl Default for Extensions {
264    fn default() -> Self {
265        Self::for_flavor(Flavor::default())
266    }
267}
268
269impl Extensions {
270    fn none_defaults() -> Self {
271        Self {
272            alerts: false,
273            all_symbols_escapable: false,
274            auto_identifiers: false,
275            autolink_bare_uris: false,
276            autolinks: false,
277            backtick_code_blocks: false,
278            blank_before_blockquote: false,
279            blank_before_header: false,
280            bookdown_references: false,
281            bookdown_equation_references: false,
282            bracketed_spans: false,
283            citations: false,
284            definition_lists: false,
285            lists_without_preceding_blankline: false,
286            emoji: false,
287            escaped_line_breaks: false,
288            example_lists: false,
289            executable_code: false,
290            rmarkdown_inline_code: false,
291            quarto_inline_code: false,
292            fancy_lists: false,
293            fenced_code_attributes: false,
294            fenced_code_blocks: false,
295            fenced_divs: false,
296            footnotes: false,
297            gfm_auto_identifiers: false,
298            grid_tables: false,
299            hard_line_breaks: false,
300            header_attributes: false,
301            implicit_figures: false,
302            implicit_header_references: false,
303            inline_code_attributes: false,
304            inline_footnotes: false,
305            inline_images: false,
306            inline_links: false,
307            intraword_underscores: false,
308            line_blocks: false,
309            link_attributes: false,
310            mark: false,
311            markdown_in_html_blocks: false,
312            mmd_header_identifiers: false,
313            mmd_link_attributes: false,
314            mmd_title_block: false,
315            multiline_tables: false,
316            native_divs: false,
317            native_spans: false,
318            pandoc_title_block: false,
319            pipe_tables: false,
320            quarto_callouts: false,
321            quarto_crossrefs: false,
322            quarto_shortcodes: false,
323            raw_attribute: false,
324            raw_html: false,
325            raw_tex: false,
326            reference_links: false,
327            shortcut_reference_links: false,
328            simple_tables: false,
329            startnum: false,
330            strikeout: false,
331            subscript: false,
332            superscript: false,
333            table_captions: false,
334            task_lists: false,
335            tex_math_dollars: false,
336            tex_math_double_backslash: false,
337            tex_math_gfm: false,
338            tex_math_single_backslash: false,
339            yaml_metadata_block: false,
340        }
341    }
342
343    /// Get the default extension set for a given flavor.
344    pub fn for_flavor(flavor: Flavor) -> Self {
345        match flavor {
346            Flavor::Pandoc => Self::pandoc_defaults(),
347            Flavor::Quarto => Self::quarto_defaults(),
348            Flavor::RMarkdown => Self::rmarkdown_defaults(),
349            Flavor::Gfm => Self::gfm_defaults(),
350            Flavor::CommonMark => Self::commonmark_defaults(),
351            Flavor::MultiMarkdown => Self::multimarkdown_defaults(),
352        }
353    }
354
355    fn pandoc_defaults() -> Self {
356        Self {
357            // Block-level - enabled by default in Pandoc
358            auto_identifiers: true,
359            blank_before_blockquote: true,
360            blank_before_header: true,
361            gfm_auto_identifiers: false,
362            header_attributes: true,
363            implicit_header_references: true,
364
365            // Lists
366            definition_lists: true,
367            example_lists: true,
368            fancy_lists: true,
369            lists_without_preceding_blankline: false,
370            startnum: true,
371            task_lists: true,
372
373            // Code
374            backtick_code_blocks: true,
375            executable_code: false,
376            rmarkdown_inline_code: false,
377            quarto_inline_code: false,
378            fenced_code_attributes: true,
379            fenced_code_blocks: true,
380            inline_code_attributes: true,
381
382            // Tables
383            grid_tables: true,
384            multiline_tables: true,
385            pipe_tables: true,
386            simple_tables: true,
387            table_captions: true,
388
389            // Divs
390            fenced_divs: true,
391            native_divs: true,
392
393            // Other blocks
394            line_blocks: true,
395
396            // Inline
397            intraword_underscores: true,
398            strikeout: true,
399            subscript: true,
400            superscript: true,
401
402            // Links
403            autolinks: true,
404            inline_links: true,
405            link_attributes: true,
406            reference_links: true,
407            shortcut_reference_links: true,
408
409            // Images
410            implicit_figures: true,
411            inline_images: true,
412
413            // Math
414            tex_math_dollars: true,
415            tex_math_double_backslash: false,
416            tex_math_gfm: false,
417            tex_math_single_backslash: false,
418
419            // Footnotes
420            footnotes: true,
421            inline_footnotes: true,
422
423            // Citations
424            citations: true,
425
426            // Spans
427            bracketed_spans: true,
428            native_spans: true,
429
430            // Metadata
431            mmd_title_block: false,
432            pandoc_title_block: true,
433            yaml_metadata_block: true,
434
435            // Raw
436            markdown_in_html_blocks: false,
437            raw_attribute: true,
438            raw_html: true,
439            raw_tex: true,
440
441            // Escapes
442            all_symbols_escapable: true,
443            escaped_line_breaks: true,
444
445            // Non-default
446            alerts: false,
447            autolink_bare_uris: false,
448            emoji: false,
449            hard_line_breaks: false,
450            mark: false,
451            mmd_header_identifiers: false,
452            mmd_link_attributes: false,
453
454            // Quarto/Bookdown-specific
455            bookdown_references: false,
456            bookdown_equation_references: false,
457            quarto_callouts: false,
458            quarto_crossrefs: false,
459            quarto_shortcodes: false,
460        }
461    }
462
463    fn quarto_defaults() -> Self {
464        let mut ext = Self::pandoc_defaults();
465
466        ext.executable_code = true;
467        ext.rmarkdown_inline_code = true;
468        ext.quarto_inline_code = true;
469        ext.quarto_callouts = true;
470        ext.quarto_crossrefs = true;
471        ext.quarto_shortcodes = true;
472
473        ext
474    }
475
476    fn rmarkdown_defaults() -> Self {
477        let mut ext = Self::pandoc_defaults();
478
479        ext.bookdown_references = true;
480        ext.bookdown_equation_references = true;
481        ext.executable_code = true;
482        ext.rmarkdown_inline_code = true;
483        ext.quarto_inline_code = false;
484        ext.tex_math_dollars = true;
485        ext.tex_math_single_backslash = true;
486
487        ext
488    }
489
490    fn gfm_defaults() -> Self {
491        let mut ext = Self::none_defaults();
492
493        ext.alerts = true;
494        ext.auto_identifiers = true;
495        ext.autolink_bare_uris = true;
496        ext.autolinks = true;
497        ext.backtick_code_blocks = true;
498        ext.emoji = true;
499        ext.fenced_code_blocks = true;
500        ext.footnotes = true;
501        ext.gfm_auto_identifiers = true;
502        ext.inline_links = true;
503        ext.pipe_tables = true;
504        ext.raw_html = true;
505        ext.strikeout = true;
506        ext.task_lists = true;
507        ext.tex_math_dollars = true;
508        ext.tex_math_gfm = true;
509        ext.yaml_metadata_block = true;
510
511        ext
512    }
513
514    fn commonmark_defaults() -> Self {
515        let mut ext = Self::none_defaults();
516        // CommonMark's core grammar is what pandoc's commonmark reader treats
517        // as "not extensions" — they're built into the reader. Panache's
518        // parser still gates each construct on its extension flag, so we have
519        // to enable the CommonMark-mandatory ones explicitly here.
520        //
521        // Notably absent: `all_symbols_escapable`. CommonMark only allows
522        // backslash escapes of ASCII punctuation, and panache's
523        // `all_symbols_escapable` flag widens that to any character — so it
524        // must stay off for CommonMark.
525        ext.autolinks = true;
526        ext.backtick_code_blocks = true;
527        ext.escaped_line_breaks = true;
528        ext.fenced_code_blocks = true;
529        ext.inline_images = true;
530        ext.inline_links = true;
531        ext.intraword_underscores = true;
532        ext.raw_html = true;
533        ext.reference_links = true;
534        ext.shortcut_reference_links = true;
535        ext
536    }
537
538    fn multimarkdown_defaults() -> Self {
539        let mut ext = Self::none_defaults();
540
541        ext.all_symbols_escapable = true;
542        ext.auto_identifiers = true;
543        ext.backtick_code_blocks = true;
544        ext.definition_lists = true;
545        ext.footnotes = true;
546        ext.implicit_figures = true;
547        ext.implicit_header_references = true;
548        ext.intraword_underscores = true;
549        ext.mmd_header_identifiers = true;
550        ext.mmd_link_attributes = true;
551        ext.mmd_title_block = true;
552        ext.pipe_tables = true;
553        ext.raw_attribute = true;
554        ext.raw_html = true;
555        ext.reference_links = true;
556        ext.shortcut_reference_links = true;
557        ext.subscript = true;
558        ext.superscript = true;
559        ext.tex_math_dollars = true;
560        ext.tex_math_double_backslash = true;
561
562        ext
563    }
564
565    /// Merge user-specified extension overrides with flavor defaults.
566    ///
567    /// This is used to support partial extension overrides in config files.
568    /// For example, if a user specifies `flavor = "quarto"` and then sets
569    /// `[extensions] quarto-crossrefs = false`, we want all other extensions
570    /// to use Quarto defaults, not Pandoc defaults.
571    ///
572    /// # Arguments
573    /// * `user_overrides` - Map of extension names to their user-specified values
574    /// * `flavor` - The flavor to use for default values
575    ///
576    /// # Returns
577    /// A new Extensions struct with flavor defaults merged with user overrides
578    pub fn merge_with_flavor(user_overrides: HashMap<String, bool>, flavor: Flavor) -> Self {
579        let defaults = Self::for_flavor(flavor);
580        Self::merge_overrides(defaults, user_overrides)
581    }
582
583    fn merge_overrides(mut base: Extensions, user_overrides: HashMap<String, bool>) -> Self {
584        for (key, value) in user_overrides {
585            let normalized_key = key.replace('_', "-");
586            match normalized_key.as_str() {
587                "blank-before-header" => base.blank_before_header = value,
588                "header-attributes" => base.header_attributes = value,
589                "auto-identifiers" => base.auto_identifiers = value,
590                "gfm-auto-identifiers" => base.gfm_auto_identifiers = value,
591                "implicit-header-references" => base.implicit_header_references = value,
592                "blank-before-blockquote" => base.blank_before_blockquote = value,
593                "fancy-lists" => base.fancy_lists = value,
594                "startnum" => base.startnum = value,
595                "example-lists" => base.example_lists = value,
596                "task-lists" => base.task_lists = value,
597                "definition-lists" => base.definition_lists = value,
598                "lists-without-preceding-blankline" => {
599                    base.lists_without_preceding_blankline = value
600                }
601                "backtick-code-blocks" => base.backtick_code_blocks = value,
602                "fenced-code-blocks" => base.fenced_code_blocks = value,
603                "fenced-code-attributes" => base.fenced_code_attributes = value,
604                "executable-code" => base.executable_code = value,
605                "rmarkdown-inline-code" => base.rmarkdown_inline_code = value,
606                "quarto-inline-code" => base.quarto_inline_code = value,
607                "inline-code-attributes" => base.inline_code_attributes = value,
608                "simple-tables" => base.simple_tables = value,
609                "multiline-tables" => base.multiline_tables = value,
610                "grid-tables" => base.grid_tables = value,
611                "pipe-tables" => base.pipe_tables = value,
612                "table-captions" => base.table_captions = value,
613                "fenced-divs" => base.fenced_divs = value,
614                "native-divs" => base.native_divs = value,
615                "line-blocks" => base.line_blocks = value,
616                "intraword-underscores" => base.intraword_underscores = value,
617                "strikeout" => base.strikeout = value,
618                "superscript" => base.superscript = value,
619                "subscript" => base.subscript = value,
620                "inline-links" => base.inline_links = value,
621                "reference-links" => base.reference_links = value,
622                "shortcut-reference-links" => base.shortcut_reference_links = value,
623                "link-attributes" => base.link_attributes = value,
624                "autolinks" => base.autolinks = value,
625                "inline-images" => base.inline_images = value,
626                "implicit-figures" => base.implicit_figures = value,
627                "tex-math-dollars" => base.tex_math_dollars = value,
628                "tex-math-gfm" => base.tex_math_gfm = value,
629                "tex-math-single-backslash" => base.tex_math_single_backslash = value,
630                "tex-math-double-backslash" => base.tex_math_double_backslash = value,
631                "inline-footnotes" => base.inline_footnotes = value,
632                "footnotes" => base.footnotes = value,
633                "citations" => base.citations = value,
634                "bracketed-spans" => base.bracketed_spans = value,
635                "native-spans" => base.native_spans = value,
636                "yaml-metadata-block" => base.yaml_metadata_block = value,
637                "pandoc-title-block" => base.pandoc_title_block = value,
638                "mmd-title-block" => base.mmd_title_block = value,
639                "raw-html" => base.raw_html = value,
640                "markdown-in-html-blocks" => base.markdown_in_html_blocks = value,
641                "raw-tex" => base.raw_tex = value,
642                "raw-attribute" => base.raw_attribute = value,
643                "all-symbols-escapable" => base.all_symbols_escapable = value,
644                "escaped-line-breaks" => base.escaped_line_breaks = value,
645                "autolink-bare-uris" => base.autolink_bare_uris = value,
646                "hard-line-breaks" => base.hard_line_breaks = value,
647                "mmd-header-identifiers" => base.mmd_header_identifiers = value,
648                "mmd-link-attributes" => base.mmd_link_attributes = value,
649                "alerts" => base.alerts = value,
650                "emoji" => base.emoji = value,
651                "mark" => base.mark = value,
652                "quarto-callouts" => base.quarto_callouts = value,
653                "quarto-crossrefs" => base.quarto_crossrefs = value,
654                "quarto-shortcodes" => base.quarto_shortcodes = value,
655                "bookdown-references" => base.bookdown_references = value,
656                "bookdown-equation-references" => base.bookdown_equation_references = value,
657                _ => {}
658            }
659        }
660        base
661    }
662}
663
664#[cfg(test)]
665mod tests {
666    use super::{Extensions, Flavor};
667    use std::collections::HashMap;
668
669    #[test]
670    fn merge_with_flavor_keeps_known_extension_overrides() {
671        let mut overrides = HashMap::new();
672        overrides.insert("intraword-underscores".to_string(), false);
673        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
674        assert!(!ext.intraword_underscores);
675    }
676
677    #[test]
678    fn merge_with_flavor_ignores_unknown_extension_overrides() {
679        let mut overrides = HashMap::new();
680        overrides.insert("smart".to_string(), true);
681        overrides.insert("smart-quotes".to_string(), true);
682        let ext = Extensions::merge_with_flavor(overrides, Flavor::Gfm);
683        assert!(ext.strikeout, "known defaults should remain intact");
684    }
685
686    #[test]
687    fn lists_without_preceding_blankline_defaults_false_for_pandoc_and_gfm() {
688        assert!(!Extensions::for_flavor(Flavor::Pandoc).lists_without_preceding_blankline);
689        assert!(!Extensions::for_flavor(Flavor::Gfm).lists_without_preceding_blankline);
690    }
691
692    #[test]
693    fn merge_with_flavor_accepts_lists_without_preceding_blankline_override() {
694        let mut overrides = HashMap::new();
695        overrides.insert("lists-without-preceding-blankline".to_string(), true);
696        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
697        assert!(ext.lists_without_preceding_blankline);
698    }
699}
700
701#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
702#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
703pub enum PandocCompat {
704    /// Alias for Panache's pinned newest supported Pandoc-compat behavior.
705    ///
706    /// This is intentionally NOT "floating upstream latest". It resolves to
707    /// a concrete version that Panache has verified, and is bumped manually.
708    #[cfg_attr(feature = "serde", serde(rename = "latest"))]
709    Latest,
710    /// Match Pandoc 3.7 behavior for ambiguous syntax edge cases.
711    #[cfg_attr(
712        feature = "serde",
713        serde(rename = "3.7", alias = "3-7", alias = "v3.7", alias = "v3-7")
714    )]
715    V3_7,
716    /// Match Pandoc 3.9 behavior for ambiguous syntax edge cases.
717    #[default]
718    #[cfg_attr(
719        feature = "serde",
720        serde(rename = "3.9", alias = "3-9", alias = "v3.9", alias = "v3-9")
721    )]
722    V3_9,
723}
724
725impl PandocCompat {
726    /// Pinned target for `latest`.
727    pub const PINNED_LATEST: Self = Self::V3_9;
728
729    pub fn effective(self) -> Self {
730        match self {
731            Self::Latest => Self::PINNED_LATEST,
732            other => other,
733        }
734    }
735}
736
737/// Parser dialect — the underlying inline tokenization rule set.
738///
739/// Distinct from [`Flavor`]: `Flavor` is the user-facing identity (Pandoc,
740/// Quarto, GFM, etc.) and selects extension defaults; `Dialect` is the
741/// structural parser identity. Several flavors share a dialect — Quarto and
742/// RMarkdown both use `Pandoc`; CommonMark and GFM both use `CommonMark`.
743///
744/// Use this for parser branches whose behavior is fundamentally different
745/// between dialect families (e.g. unmatched backtick run handling). Per-flavor
746/// feature toggles still belong on [`Extensions`].
747#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
748#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
749#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
750#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
751pub enum Dialect {
752    /// Pandoc-markdown family. Default for Pandoc, Quarto, RMarkdown,
753    /// MultiMarkdown.
754    #[default]
755    Pandoc,
756    /// CommonMark family. Default for CommonMark and GFM.
757    CommonMark,
758}
759
760impl Dialect {
761    /// Default dialect for a given user-facing flavor.
762    pub fn for_flavor(flavor: Flavor) -> Self {
763        match flavor {
764            Flavor::CommonMark | Flavor::Gfm => Dialect::CommonMark,
765            Flavor::Pandoc | Flavor::Quarto | Flavor::RMarkdown | Flavor::MultiMarkdown => {
766                Dialect::Pandoc
767            }
768        }
769    }
770}
771
772#[derive(Debug, Clone)]
773#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
774#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
775#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
776pub struct ParserOptions {
777    pub flavor: Flavor,
778    pub dialect: Dialect,
779    pub extensions: Extensions,
780    /// Compatibility target for ambiguous Pandoc behavior.
781    pub pandoc_compat: PandocCompat,
782    /// Document-level reference link label set, populated by the
783    /// top-level `parse()` function when running CommonMark dialect and
784    /// consulted by inline parsing's bracket resolution pass. `None`
785    /// means "not pre-computed"; the inline pipeline then treats every
786    /// reference-shaped bracket pair conservatively (current behavior),
787    /// which is correct for the Pandoc dialect and a graceful
788    /// degradation for embedded use cases that bypass `parse()`.
789    ///
790    /// Skipped by serde so config files don't try to (de)serialize a
791    /// runtime cache.
792    #[cfg_attr(feature = "serde", serde(skip))]
793    pub refdef_labels: Option<Arc<HashSet<String>>>,
794}
795
796impl Default for ParserOptions {
797    fn default() -> Self {
798        let flavor = Flavor::default();
799        Self {
800            flavor,
801            dialect: Dialect::for_flavor(flavor),
802            extensions: Extensions::for_flavor(flavor),
803            pandoc_compat: PandocCompat::default(),
804            refdef_labels: None,
805        }
806    }
807}
808
809impl ParserOptions {
810    pub fn effective_pandoc_compat(&self) -> PandocCompat {
811        self.pandoc_compat.effective()
812    }
813}
814
815#[cfg(feature = "schema")]
816impl schemars::JsonSchema for Flavor {
817    fn schema_name() -> std::borrow::Cow<'static, str> {
818        "Flavor".into()
819    }
820
821    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
822        // Include serde aliases so the schema accepts every spelling the
823        // parser accepts (e.g. `commonmark` alongside the kebab-case
824        // `common-mark` canonical form).
825        schemars::json_schema!({
826            "type": "string",
827            "description": "Markdown flavor to parse and format against.",
828            "enum": [
829                "pandoc",
830                "quarto",
831                "rmarkdown",
832                "gfm",
833                "common-mark",
834                "commonmark",
835                "multimarkdown"
836            ]
837        })
838    }
839}
840
841#[cfg(feature = "schema")]
842impl schemars::JsonSchema for PandocCompat {
843    fn schema_name() -> std::borrow::Cow<'static, str> {
844        "PandocCompat".into()
845    }
846
847    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
848        schemars::json_schema!({
849            "type": "string",
850            "description": "Compatibility target for ambiguous Pandoc behavior.",
851            "enum": [
852                "latest",
853                "3.7", "3-7", "v3.7", "v3-7",
854                "3.9", "3-9", "v3.9", "v3-9"
855            ]
856        })
857    }
858}