Skip to main content

panache_parser/
options.rs

1use std::collections::{HashMap, HashSet};
2use std::sync::Arc;
3
4/// The flavor of Markdown to parse and format.
5/// Each flavor has a different set of default extensions enabled.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
9pub enum Flavor {
10    /// Standard Pandoc Markdown (default extensions enabled)
11    #[default]
12    Pandoc,
13    /// Quarto (Pandoc + Quarto-specific extensions)
14    Quarto,
15    /// R Markdown (Pandoc + R-specific extensions)
16    #[cfg_attr(feature = "serde", serde(rename = "rmarkdown"))]
17    RMarkdown,
18    /// GitHub Flavored Markdown
19    Gfm,
20    /// CommonMark
21    #[cfg_attr(feature = "serde", serde(alias = "commonmark"))]
22    CommonMark,
23    /// MultiMarkdown
24    #[cfg_attr(feature = "serde", serde(rename = "multimarkdown"))]
25    MultiMarkdown,
26}
27
28/// Pandoc/Markdown extensions configuration.
29/// Each field represents a specific Pandoc extension.
30/// Extensions marked with a comment indicate implementation status.
31#[derive(Debug, Clone, PartialEq)]
32#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
33#[cfg_attr(feature = "serde", serde(default))]
34#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
35#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
36pub struct Extensions {
37    // ===== Block-level extensions =====
38
39    // Headings
40    /// Require blank line before headers (default: enabled)
41    #[cfg_attr(feature = "serde", serde(alias = "blank_before_header"))]
42    pub blank_before_header: bool,
43    /// Full attribute syntax on headers {#id .class key=value}
44    #[cfg_attr(feature = "serde", serde(alias = "header_attributes"))]
45    pub header_attributes: bool,
46    /// Auto-generate identifiers from headings
47    pub auto_identifiers: bool,
48    /// Use GitHub's algorithm for auto-generated heading identifiers
49    pub gfm_auto_identifiers: bool,
50    /// Implicit header references ([Heading] links to header)
51    pub implicit_header_references: bool,
52
53    // Block quotes
54    /// Require blank line before blockquotes (default: enabled)
55    #[cfg_attr(feature = "serde", serde(alias = "blank_before_blockquote"))]
56    pub blank_before_blockquote: bool,
57
58    // Lists
59    /// Fancy list markers (roman numerals, letters, etc.)
60    #[cfg_attr(feature = "serde", serde(alias = "fancy_lists"))]
61    pub fancy_lists: bool,
62    /// Start ordered lists at arbitrary numbers
63    pub startnum: bool,
64    /// Example lists with (@) markers
65    #[cfg_attr(feature = "serde", serde(alias = "example_lists"))]
66    pub example_lists: bool,
67    /// GitHub-style task lists - [ ] and - [x]
68    #[cfg_attr(feature = "serde", serde(alias = "task_lists"))]
69    pub task_lists: bool,
70    /// Term/definition syntax
71    #[cfg_attr(feature = "serde", serde(alias = "definition_lists"))]
72    pub definition_lists: bool,
73    /// Allow lists without a preceding blank line
74    #[cfg_attr(feature = "serde", serde(alias = "lists_without_preceding_blankline"))]
75    pub lists_without_preceding_blankline: bool,
76    /// [NON-DEFAULT] Pandoc <= 2.0 list semantics: continuation paragraphs and
77    /// nested lists require four-space (one tab-width) indentation
78    #[cfg_attr(feature = "serde", serde(alias = "four_space_rule"))]
79    pub four_space_rule: bool,
80
81    // Code blocks
82    /// Fenced code blocks with backticks
83    #[cfg_attr(feature = "serde", serde(alias = "backtick_code_blocks"))]
84    pub backtick_code_blocks: bool,
85    /// Fenced code blocks with tildes
86    #[cfg_attr(feature = "serde", serde(alias = "fenced_code_blocks"))]
87    pub fenced_code_blocks: bool,
88    /// Attributes on fenced code blocks {.language #id}
89    #[cfg_attr(feature = "serde", serde(alias = "fenced_code_attributes"))]
90    pub fenced_code_attributes: bool,
91    /// Executable code syntax (currently fenced chunks like ```{r} / ```{python})
92    pub executable_code: bool,
93    /// R Markdown inline executable code (`...`r ...)
94    pub rmarkdown_inline_code: bool,
95    /// Quarto inline executable code (`...`{r} ...)
96    pub quarto_inline_code: bool,
97    /// Attributes on inline code
98    #[cfg_attr(feature = "serde", serde(alias = "inline_code_attributes"))]
99    pub inline_code_attributes: bool,
100
101    // Tables
102    /// Simple table syntax
103    #[cfg_attr(feature = "serde", serde(alias = "simple_tables"))]
104    pub simple_tables: bool,
105    /// Multiline cell content in tables
106    #[cfg_attr(feature = "serde", serde(alias = "multiline_tables"))]
107    pub multiline_tables: bool,
108    /// Grid-style tables
109    #[cfg_attr(feature = "serde", serde(alias = "grid_tables"))]
110    pub grid_tables: bool,
111    /// Pipe tables (GitHub/PHP Markdown style)
112    #[cfg_attr(feature = "serde", serde(alias = "pipe_tables"))]
113    pub pipe_tables: bool,
114    /// Table captions
115    #[cfg_attr(feature = "serde", serde(alias = "table_captions"))]
116    pub table_captions: bool,
117
118    // Divs
119    /// Fenced divs ::: {.class}
120    #[cfg_attr(feature = "serde", serde(alias = "fenced_divs"))]
121    pub fenced_divs: bool,
122    /// HTML <div> elements
123    #[cfg_attr(feature = "serde", serde(alias = "native_divs"))]
124    pub native_divs: bool,
125
126    // Other block elements
127    /// Line blocks for poetry | prefix
128    #[cfg_attr(feature = "serde", serde(alias = "line_blocks"))]
129    pub line_blocks: bool,
130
131    // ===== Inline elements =====
132
133    // Emphasis
134    /// Underscores don't trigger emphasis in snake_case
135    #[cfg_attr(feature = "serde", serde(alias = "intraword_underscores"))]
136    pub intraword_underscores: bool,
137    /// Strikethrough ~~text~~
138    pub strikeout: bool,
139    /// Superscript and subscript ^super^ ~sub~
140    pub superscript: bool,
141    pub subscript: bool,
142
143    // Links
144    /// Inline links [text](url)
145    #[cfg_attr(feature = "serde", serde(alias = "inline_links"))]
146    pub inline_links: bool,
147    /// Reference links [text][ref]
148    #[cfg_attr(feature = "serde", serde(alias = "reference_links"))]
149    pub reference_links: bool,
150    /// Shortcut reference links [ref] without second []
151    #[cfg_attr(feature = "serde", serde(alias = "shortcut_reference_links"))]
152    pub shortcut_reference_links: bool,
153    /// Attributes on links [text](url){.class}
154    #[cfg_attr(feature = "serde", serde(alias = "link_attributes"))]
155    pub link_attributes: bool,
156    /// Automatic links <http://example.com>
157    pub autolinks: bool,
158
159    // Images
160    /// Inline images ![alt](url)
161    #[cfg_attr(feature = "serde", serde(alias = "inline_images"))]
162    pub inline_images: bool,
163    /// Paragraph with just image becomes figure
164    #[cfg_attr(feature = "serde", serde(alias = "implicit_figures"))]
165    pub implicit_figures: bool,
166
167    // Math
168    /// Dollar-delimited math $x$ and $$equation$$
169    #[cfg_attr(feature = "serde", serde(alias = "tex_math_dollars"))]
170    pub tex_math_dollars: bool,
171    /// [NON-DEFAULT] GFM math: inline $`...`$ and fenced ``` math blocks
172    #[cfg_attr(feature = "serde", serde(alias = "tex_math_gfm"))]
173    pub tex_math_gfm: bool,
174    /// [NON-DEFAULT] Single backslash math \(...\) and \[...\] (RMarkdown default)
175    #[cfg_attr(feature = "serde", serde(alias = "tex_math_single_backslash"))]
176    pub tex_math_single_backslash: bool,
177    /// [NON-DEFAULT] Double backslash math \\(...\\) and \\[...\\]
178    #[cfg_attr(feature = "serde", serde(alias = "tex_math_double_backslash"))]
179    pub tex_math_double_backslash: bool,
180
181    // Footnotes
182    /// Inline footnotes ^[text]
183    #[cfg_attr(feature = "serde", serde(alias = "inline_footnotes"))]
184    pub inline_footnotes: bool,
185    /// Reference footnotes `[^1]` (requires footnote parsing)
186    pub footnotes: bool,
187
188    // Citations
189    /// Citation syntax [@cite]
190    pub citations: bool,
191
192    // Spans
193    /// Bracketed spans [text]{.class}
194    #[cfg_attr(feature = "serde", serde(alias = "bracketed_spans"))]
195    pub bracketed_spans: bool,
196    /// HTML <span> elements
197    #[cfg_attr(feature = "serde", serde(alias = "native_spans"))]
198    pub native_spans: bool,
199
200    // ===== Metadata =====
201    /// YAML metadata block
202    #[cfg_attr(feature = "serde", serde(alias = "yaml_metadata_block"))]
203    pub yaml_metadata_block: bool,
204    /// Pandoc title block (Title/Author/Date)
205    #[cfg_attr(feature = "serde", serde(alias = "pandoc_title_block"))]
206    pub pandoc_title_block: bool,
207    /// [NON-DEFAULT] MultiMarkdown metadata/title block (Key: Value ...)
208    pub mmd_title_block: bool,
209
210    // ===== Raw content =====
211    /// Raw HTML blocks and inline
212    #[cfg_attr(feature = "serde", serde(alias = "raw_html"))]
213    pub raw_html: bool,
214    /// Markdown inside HTML blocks
215    #[cfg_attr(feature = "serde", serde(alias = "markdown_in_html_blocks"))]
216    pub markdown_in_html_blocks: bool,
217    /// LaTeX commands and environments
218    #[cfg_attr(feature = "serde", serde(alias = "raw_tex"))]
219    pub raw_tex: bool,
220    /// Generic raw blocks with {=format} syntax
221    #[cfg_attr(feature = "serde", serde(alias = "raw_attribute"))]
222    pub raw_attribute: bool,
223
224    // ===== Escapes and special characters =====
225    /// Backslash escapes any symbol
226    #[cfg_attr(feature = "serde", serde(alias = "all_symbols_escapable"))]
227    pub all_symbols_escapable: bool,
228    /// Backslash at line end = hard line break
229    #[cfg_attr(feature = "serde", serde(alias = "escaped_line_breaks"))]
230    pub escaped_line_breaks: bool,
231
232    // ===== NON-DEFAULT EXTENSIONS =====
233    // These are disabled by default in Pandoc
234    /// [NON-DEFAULT] Bare URLs become links
235    #[cfg_attr(feature = "serde", serde(alias = "autolink_bare_uris"))]
236    pub autolink_bare_uris: bool,
237    /// [NON-DEFAULT] Newline = <br>
238    #[cfg_attr(feature = "serde", serde(alias = "hard_line_breaks"))]
239    pub hard_line_breaks: bool,
240    /// [NON-DEFAULT] MultiMarkdown style heading identifiers [my-id]
241    pub mmd_header_identifiers: bool,
242    /// [NON-DEFAULT] MultiMarkdown key=value attributes on reference defs
243    pub mmd_link_attributes: bool,
244    /// [NON-DEFAULT] GitHub/CommonMark alerts in blockquotes (`> [!NOTE]`)
245    pub alerts: bool,
246    /// [NON-DEFAULT] :emoji: syntax
247    pub emoji: bool,
248    /// [NON-DEFAULT] Highlighted ==text==
249    pub mark: bool,
250
251    // ===== Quarto-specific extensions =====
252    /// Quarto callout blocks (.callout-note, etc.)
253    #[cfg_attr(feature = "serde", serde(alias = "quarto_callouts"))]
254    pub quarto_callouts: bool,
255    /// Quarto cross-references @fig-id, @tbl-id
256    #[cfg_attr(feature = "serde", serde(alias = "quarto_crossrefs"))]
257    pub quarto_crossrefs: bool,
258    /// Quarto shortcodes {{< name args >}}
259    #[cfg_attr(feature = "serde", serde(alias = "quarto_shortcodes"))]
260    pub quarto_shortcodes: bool,
261    /// Bookdown references \@ref(label) and (\#label)
262    pub bookdown_references: bool,
263    /// Bookdown equation references in LaTeX math blocks (\#eq:label)
264    pub bookdown_equation_references: bool,
265}
266
267impl Default for Extensions {
268    fn default() -> Self {
269        Self::for_flavor(Flavor::default())
270    }
271}
272
273impl Extensions {
274    fn none_defaults() -> Self {
275        Self {
276            alerts: false,
277            all_symbols_escapable: false,
278            auto_identifiers: false,
279            autolink_bare_uris: false,
280            autolinks: false,
281            backtick_code_blocks: false,
282            blank_before_blockquote: false,
283            blank_before_header: false,
284            bookdown_references: false,
285            bookdown_equation_references: false,
286            bracketed_spans: false,
287            citations: false,
288            definition_lists: false,
289            lists_without_preceding_blankline: false,
290            emoji: false,
291            escaped_line_breaks: false,
292            example_lists: false,
293            executable_code: false,
294            rmarkdown_inline_code: false,
295            quarto_inline_code: false,
296            fancy_lists: false,
297            fenced_code_attributes: false,
298            fenced_code_blocks: false,
299            fenced_divs: false,
300            footnotes: false,
301            four_space_rule: false,
302            gfm_auto_identifiers: false,
303            grid_tables: false,
304            hard_line_breaks: false,
305            header_attributes: false,
306            implicit_figures: false,
307            implicit_header_references: false,
308            inline_code_attributes: false,
309            inline_footnotes: false,
310            inline_images: false,
311            inline_links: false,
312            intraword_underscores: false,
313            line_blocks: false,
314            link_attributes: false,
315            mark: false,
316            markdown_in_html_blocks: false,
317            mmd_header_identifiers: false,
318            mmd_link_attributes: false,
319            mmd_title_block: false,
320            multiline_tables: false,
321            native_divs: false,
322            native_spans: false,
323            pandoc_title_block: false,
324            pipe_tables: false,
325            quarto_callouts: false,
326            quarto_crossrefs: false,
327            quarto_shortcodes: false,
328            raw_attribute: false,
329            raw_html: false,
330            raw_tex: false,
331            reference_links: false,
332            shortcut_reference_links: false,
333            simple_tables: false,
334            startnum: false,
335            strikeout: false,
336            subscript: false,
337            superscript: false,
338            table_captions: false,
339            task_lists: false,
340            tex_math_dollars: false,
341            tex_math_double_backslash: false,
342            tex_math_gfm: false,
343            tex_math_single_backslash: false,
344            yaml_metadata_block: false,
345        }
346    }
347
348    /// Get the default extension set for a given flavor.
349    pub fn for_flavor(flavor: Flavor) -> Self {
350        match flavor {
351            Flavor::Pandoc => Self::pandoc_defaults(),
352            Flavor::Quarto => Self::quarto_defaults(),
353            Flavor::RMarkdown => Self::rmarkdown_defaults(),
354            Flavor::Gfm => Self::gfm_defaults(),
355            Flavor::CommonMark => Self::commonmark_defaults(),
356            Flavor::MultiMarkdown => Self::multimarkdown_defaults(),
357        }
358    }
359
360    fn pandoc_defaults() -> Self {
361        Self {
362            // Block-level - enabled by default in Pandoc
363            auto_identifiers: true,
364            blank_before_blockquote: true,
365            blank_before_header: true,
366            gfm_auto_identifiers: false,
367            header_attributes: true,
368            implicit_header_references: true,
369
370            // Lists
371            definition_lists: true,
372            example_lists: true,
373            fancy_lists: true,
374            lists_without_preceding_blankline: false,
375            startnum: true,
376            task_lists: true,
377
378            // Code
379            backtick_code_blocks: true,
380            executable_code: false,
381            rmarkdown_inline_code: false,
382            quarto_inline_code: false,
383            fenced_code_attributes: true,
384            fenced_code_blocks: true,
385            inline_code_attributes: true,
386
387            // Tables
388            grid_tables: true,
389            multiline_tables: true,
390            pipe_tables: true,
391            simple_tables: true,
392            table_captions: true,
393
394            // Divs
395            fenced_divs: true,
396            native_divs: true,
397
398            // Other blocks
399            line_blocks: true,
400
401            // Inline
402            intraword_underscores: true,
403            strikeout: true,
404            subscript: true,
405            superscript: true,
406
407            // Links
408            autolinks: true,
409            inline_links: true,
410            link_attributes: true,
411            reference_links: true,
412            shortcut_reference_links: true,
413
414            // Images
415            implicit_figures: true,
416            inline_images: true,
417
418            // Math
419            tex_math_dollars: true,
420            tex_math_double_backslash: false,
421            tex_math_gfm: false,
422            tex_math_single_backslash: false,
423
424            // Footnotes
425            footnotes: true,
426            inline_footnotes: true,
427
428            // Citations
429            citations: true,
430
431            // Spans
432            bracketed_spans: true,
433            native_spans: true,
434
435            // Metadata
436            mmd_title_block: false,
437            pandoc_title_block: true,
438            yaml_metadata_block: true,
439
440            // Raw
441            markdown_in_html_blocks: false,
442            raw_attribute: true,
443            raw_html: true,
444            raw_tex: true,
445
446            // Escapes
447            all_symbols_escapable: true,
448            escaped_line_breaks: true,
449
450            // Non-default
451            alerts: false,
452            autolink_bare_uris: false,
453            emoji: false,
454            four_space_rule: false,
455            hard_line_breaks: false,
456            mark: false,
457            mmd_header_identifiers: false,
458            mmd_link_attributes: false,
459
460            // Quarto/Bookdown-specific
461            bookdown_references: false,
462            bookdown_equation_references: false,
463            quarto_callouts: false,
464            quarto_crossrefs: false,
465            quarto_shortcodes: false,
466        }
467    }
468
469    fn quarto_defaults() -> Self {
470        let mut ext = Self::pandoc_defaults();
471
472        ext.executable_code = true;
473        ext.rmarkdown_inline_code = true;
474        ext.quarto_inline_code = true;
475        ext.quarto_callouts = true;
476        ext.quarto_crossrefs = true;
477        ext.quarto_shortcodes = true;
478
479        ext
480    }
481
482    fn rmarkdown_defaults() -> Self {
483        let mut ext = Self::pandoc_defaults();
484
485        ext.bookdown_references = true;
486        ext.bookdown_equation_references = true;
487        ext.executable_code = true;
488        ext.rmarkdown_inline_code = true;
489        ext.quarto_inline_code = false;
490        ext.tex_math_dollars = true;
491        ext.tex_math_single_backslash = true;
492
493        ext
494    }
495
496    fn gfm_defaults() -> Self {
497        let mut ext = Self::none_defaults();
498
499        ext.alerts = true;
500        ext.auto_identifiers = true;
501        ext.autolink_bare_uris = true;
502        ext.autolinks = true;
503        ext.backtick_code_blocks = true;
504        ext.emoji = true;
505        ext.fenced_code_blocks = true;
506        ext.footnotes = true;
507        ext.gfm_auto_identifiers = true;
508        ext.inline_links = true;
509        ext.pipe_tables = true;
510        ext.raw_html = true;
511        ext.reference_links = true;
512        ext.shortcut_reference_links = true;
513        ext.strikeout = true;
514        ext.task_lists = true;
515        ext.tex_math_dollars = true;
516        ext.tex_math_gfm = true;
517        ext.yaml_metadata_block = true;
518
519        ext
520    }
521
522    fn commonmark_defaults() -> Self {
523        let mut ext = Self::none_defaults();
524        // CommonMark's core grammar is what pandoc's commonmark reader treats
525        // as "not extensions" — they're built into the reader. Panache's
526        // parser still gates each construct on its extension flag, so we have
527        // to enable the CommonMark-mandatory ones explicitly here.
528        //
529        // Notably absent: `all_symbols_escapable`. CommonMark only allows
530        // backslash escapes of ASCII punctuation, and panache's
531        // `all_symbols_escapable` flag widens that to any character — so it
532        // must stay off for CommonMark.
533        ext.autolinks = true;
534        ext.backtick_code_blocks = true;
535        ext.escaped_line_breaks = true;
536        ext.fenced_code_blocks = true;
537        ext.inline_images = true;
538        ext.inline_links = true;
539        ext.intraword_underscores = true;
540        ext.raw_html = true;
541        ext.reference_links = true;
542        ext.shortcut_reference_links = true;
543        ext
544    }
545
546    fn multimarkdown_defaults() -> Self {
547        let mut ext = Self::none_defaults();
548
549        ext.all_symbols_escapable = true;
550        ext.auto_identifiers = true;
551        ext.backtick_code_blocks = true;
552        ext.definition_lists = true;
553        ext.footnotes = true;
554        ext.implicit_figures = true;
555        ext.implicit_header_references = true;
556        ext.intraword_underscores = true;
557        ext.mmd_header_identifiers = true;
558        ext.mmd_link_attributes = true;
559        ext.mmd_title_block = true;
560        ext.pipe_tables = true;
561        ext.raw_attribute = true;
562        ext.raw_html = true;
563        ext.reference_links = true;
564        ext.shortcut_reference_links = true;
565        ext.subscript = true;
566        ext.superscript = true;
567        ext.tex_math_dollars = true;
568        ext.tex_math_double_backslash = true;
569
570        ext
571    }
572
573    /// Merge user-specified extension overrides with flavor defaults.
574    ///
575    /// This is used to support partial extension overrides in config files.
576    /// For example, if a user specifies `flavor = "quarto"` and then sets
577    /// `[extensions] quarto-crossrefs = false`, we want all other extensions
578    /// to use Quarto defaults, not Pandoc defaults.
579    ///
580    /// # Arguments
581    /// * `user_overrides` - Map of extension names to their user-specified values
582    /// * `flavor` - The flavor to use for default values
583    ///
584    /// # Returns
585    /// A new Extensions struct with flavor defaults merged with user overrides
586    pub fn merge_with_flavor(user_overrides: HashMap<String, bool>, flavor: Flavor) -> Self {
587        let defaults = Self::for_flavor(flavor);
588        Self::merge_overrides(defaults, user_overrides)
589    }
590
591    fn merge_overrides(mut base: Extensions, user_overrides: HashMap<String, bool>) -> Self {
592        for (key, value) in user_overrides {
593            let normalized_key = key.replace('_', "-");
594            match normalized_key.as_str() {
595                "blank-before-header" => base.blank_before_header = value,
596                "header-attributes" => base.header_attributes = value,
597                "auto-identifiers" => base.auto_identifiers = value,
598                "gfm-auto-identifiers" => base.gfm_auto_identifiers = value,
599                "implicit-header-references" => base.implicit_header_references = value,
600                "blank-before-blockquote" => base.blank_before_blockquote = value,
601                "fancy-lists" => base.fancy_lists = value,
602                "startnum" => base.startnum = value,
603                "example-lists" => base.example_lists = value,
604                "task-lists" => base.task_lists = value,
605                "definition-lists" => base.definition_lists = value,
606                "lists-without-preceding-blankline" => {
607                    base.lists_without_preceding_blankline = value
608                }
609                "four-space-rule" => base.four_space_rule = value,
610                "backtick-code-blocks" => base.backtick_code_blocks = value,
611                "fenced-code-blocks" => base.fenced_code_blocks = value,
612                "fenced-code-attributes" => base.fenced_code_attributes = value,
613                "executable-code" => base.executable_code = value,
614                "rmarkdown-inline-code" => base.rmarkdown_inline_code = value,
615                "quarto-inline-code" => base.quarto_inline_code = value,
616                "inline-code-attributes" => base.inline_code_attributes = value,
617                "simple-tables" => base.simple_tables = value,
618                "multiline-tables" => base.multiline_tables = value,
619                "grid-tables" => base.grid_tables = value,
620                "pipe-tables" => base.pipe_tables = value,
621                "table-captions" => base.table_captions = value,
622                "fenced-divs" => base.fenced_divs = value,
623                "native-divs" => base.native_divs = value,
624                "line-blocks" => base.line_blocks = value,
625                "intraword-underscores" => base.intraword_underscores = value,
626                "strikeout" => base.strikeout = value,
627                "superscript" => base.superscript = value,
628                "subscript" => base.subscript = value,
629                "inline-links" => base.inline_links = value,
630                "reference-links" => base.reference_links = value,
631                "shortcut-reference-links" => base.shortcut_reference_links = value,
632                "link-attributes" => base.link_attributes = value,
633                "autolinks" => base.autolinks = value,
634                "inline-images" => base.inline_images = value,
635                "implicit-figures" => base.implicit_figures = value,
636                "tex-math-dollars" => base.tex_math_dollars = value,
637                "tex-math-gfm" => base.tex_math_gfm = value,
638                "tex-math-single-backslash" => base.tex_math_single_backslash = value,
639                "tex-math-double-backslash" => base.tex_math_double_backslash = value,
640                "inline-footnotes" => base.inline_footnotes = value,
641                "footnotes" => base.footnotes = value,
642                "citations" => base.citations = value,
643                "bracketed-spans" => base.bracketed_spans = value,
644                "native-spans" => base.native_spans = value,
645                "yaml-metadata-block" => base.yaml_metadata_block = value,
646                "pandoc-title-block" => base.pandoc_title_block = value,
647                "mmd-title-block" => base.mmd_title_block = value,
648                "raw-html" => base.raw_html = value,
649                "markdown-in-html-blocks" => base.markdown_in_html_blocks = value,
650                "raw-tex" => base.raw_tex = value,
651                "raw-attribute" => base.raw_attribute = value,
652                "all-symbols-escapable" => base.all_symbols_escapable = value,
653                "escaped-line-breaks" => base.escaped_line_breaks = value,
654                "autolink-bare-uris" => base.autolink_bare_uris = value,
655                "hard-line-breaks" => base.hard_line_breaks = value,
656                "mmd-header-identifiers" => base.mmd_header_identifiers = value,
657                "mmd-link-attributes" => base.mmd_link_attributes = value,
658                "alerts" => base.alerts = value,
659                "emoji" => base.emoji = value,
660                "mark" => base.mark = value,
661                "quarto-callouts" => base.quarto_callouts = value,
662                "quarto-crossrefs" => base.quarto_crossrefs = value,
663                "quarto-shortcodes" => base.quarto_shortcodes = value,
664                "bookdown-references" => base.bookdown_references = value,
665                "bookdown-equation-references" => base.bookdown_equation_references = value,
666                _ => {}
667            }
668        }
669        base
670    }
671}
672
673#[cfg(test)]
674mod tests {
675    use super::{Extensions, Flavor};
676    use std::collections::HashMap;
677
678    #[test]
679    fn merge_with_flavor_keeps_known_extension_overrides() {
680        let mut overrides = HashMap::new();
681        overrides.insert("intraword-underscores".to_string(), false);
682        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
683        assert!(!ext.intraword_underscores);
684    }
685
686    #[test]
687    fn merge_with_flavor_ignores_unknown_extension_overrides() {
688        let mut overrides = HashMap::new();
689        overrides.insert("smart".to_string(), true);
690        overrides.insert("smart-quotes".to_string(), true);
691        let ext = Extensions::merge_with_flavor(overrides, Flavor::Gfm);
692        assert!(ext.strikeout, "known defaults should remain intact");
693    }
694
695    #[test]
696    fn lists_without_preceding_blankline_defaults_false_for_pandoc_and_gfm() {
697        assert!(!Extensions::for_flavor(Flavor::Pandoc).lists_without_preceding_blankline);
698        assert!(!Extensions::for_flavor(Flavor::Gfm).lists_without_preceding_blankline);
699    }
700
701    #[test]
702    fn merge_with_flavor_accepts_lists_without_preceding_blankline_override() {
703        let mut overrides = HashMap::new();
704        overrides.insert("lists-without-preceding-blankline".to_string(), true);
705        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
706        assert!(ext.lists_without_preceding_blankline);
707    }
708
709    #[test]
710    fn four_space_rule_defaults_off_for_every_flavor() {
711        for flavor in [
712            Flavor::Pandoc,
713            Flavor::Quarto,
714            Flavor::RMarkdown,
715            Flavor::Gfm,
716            Flavor::CommonMark,
717            Flavor::MultiMarkdown,
718        ] {
719            assert!(
720                !Extensions::for_flavor(flavor).four_space_rule,
721                "four_space_rule should be off by default for {flavor:?}"
722            );
723        }
724    }
725
726    #[test]
727    fn merge_with_flavor_accepts_four_space_rule_override() {
728        let mut overrides = HashMap::new();
729        overrides.insert("four-space-rule".to_string(), true);
730        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
731        assert!(ext.four_space_rule);
732    }
733}
734
735#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
736#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
737pub enum PandocCompat {
738    /// Alias for Panache's pinned newest supported Pandoc-compat behavior.
739    ///
740    /// This is intentionally NOT "floating upstream latest". It resolves to
741    /// a concrete version that Panache has verified, and is bumped manually.
742    #[cfg_attr(feature = "serde", serde(rename = "latest"))]
743    Latest,
744    /// Match Pandoc 3.7 behavior for ambiguous syntax edge cases.
745    #[cfg_attr(
746        feature = "serde",
747        serde(rename = "3.7", alias = "3-7", alias = "v3.7", alias = "v3-7")
748    )]
749    V3_7,
750    /// Match Pandoc 3.9 behavior for ambiguous syntax edge cases.
751    #[default]
752    #[cfg_attr(
753        feature = "serde",
754        serde(rename = "3.9", alias = "3-9", alias = "v3.9", alias = "v3-9")
755    )]
756    V3_9,
757}
758
759impl PandocCompat {
760    /// Pinned target for `latest`.
761    pub const PINNED_LATEST: Self = Self::V3_9;
762
763    pub fn effective(self) -> Self {
764        match self {
765            Self::Latest => Self::PINNED_LATEST,
766            other => other,
767        }
768    }
769}
770
771/// Parser dialect — the underlying inline tokenization rule set.
772///
773/// Distinct from [`Flavor`]: `Flavor` is the user-facing identity (Pandoc,
774/// Quarto, GFM, etc.) and selects extension defaults; `Dialect` is the
775/// structural parser identity. Several flavors share a dialect — Quarto and
776/// RMarkdown both use `Pandoc`; CommonMark and GFM both use `CommonMark`.
777///
778/// Use this for parser branches whose behavior is fundamentally different
779/// between dialect families (e.g. unmatched backtick run handling). Per-flavor
780/// feature toggles still belong on [`Extensions`].
781#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
782#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
783#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
784#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
785pub enum Dialect {
786    /// Pandoc-markdown family. Default for Pandoc, Quarto, RMarkdown,
787    /// MultiMarkdown.
788    #[default]
789    Pandoc,
790    /// CommonMark family. Default for CommonMark and GFM.
791    CommonMark,
792}
793
794impl Dialect {
795    /// Default dialect for a given user-facing flavor.
796    pub fn for_flavor(flavor: Flavor) -> Self {
797        match flavor {
798            Flavor::CommonMark | Flavor::Gfm => Dialect::CommonMark,
799            Flavor::Pandoc | Flavor::Quarto | Flavor::RMarkdown | Flavor::MultiMarkdown => {
800                Dialect::Pandoc
801            }
802        }
803    }
804}
805
806#[derive(Debug, Clone)]
807#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
808#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
809#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
810pub struct ParserOptions {
811    pub flavor: Flavor,
812    pub dialect: Dialect,
813    pub extensions: Extensions,
814    /// Compatibility target for ambiguous Pandoc behavior.
815    pub pandoc_compat: PandocCompat,
816    /// Document-level reference link label set, populated by the
817    /// top-level `parse()` function when running CommonMark dialect and
818    /// consulted by inline parsing's bracket resolution pass. `None`
819    /// means "not pre-computed"; the inline pipeline then treats every
820    /// reference-shaped bracket pair conservatively (current behavior),
821    /// which is correct for the Pandoc dialect and a graceful
822    /// degradation for embedded use cases that bypass `parse()`.
823    ///
824    /// Skipped by serde so config files don't try to (de)serialize a
825    /// runtime cache.
826    #[cfg_attr(feature = "serde", serde(skip))]
827    pub refdef_labels: Option<Arc<HashSet<String>>>,
828}
829
830impl Default for ParserOptions {
831    fn default() -> Self {
832        let flavor = Flavor::default();
833        Self {
834            flavor,
835            dialect: Dialect::for_flavor(flavor),
836            extensions: Extensions::for_flavor(flavor),
837            pandoc_compat: PandocCompat::default(),
838            refdef_labels: None,
839        }
840    }
841}
842
843impl ParserOptions {
844    pub fn effective_pandoc_compat(&self) -> PandocCompat {
845        self.pandoc_compat.effective()
846    }
847}
848
849#[cfg(feature = "schema")]
850impl schemars::JsonSchema for Flavor {
851    fn schema_name() -> std::borrow::Cow<'static, str> {
852        "Flavor".into()
853    }
854
855    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
856        // Include serde aliases so the schema accepts every spelling the
857        // parser accepts (e.g. `commonmark` alongside the kebab-case
858        // `common-mark` canonical form).
859        schemars::json_schema!({
860            "type": "string",
861            "description": "Markdown flavor to parse and format against.",
862            "enum": [
863                "pandoc",
864                "quarto",
865                "rmarkdown",
866                "gfm",
867                "common-mark",
868                "commonmark",
869                "multimarkdown"
870            ]
871        })
872    }
873}
874
875#[cfg(feature = "schema")]
876impl schemars::JsonSchema for PandocCompat {
877    fn schema_name() -> std::borrow::Cow<'static, str> {
878        "PandocCompat".into()
879    }
880
881    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
882        schemars::json_schema!({
883            "type": "string",
884            "description": "Compatibility target for ambiguous Pandoc behavior.",
885            "enum": [
886                "latest",
887                "3.7", "3-7", "v3.7", "v3-7",
888                "3.9", "3-9", "v3.9", "v3-9"
889            ]
890        })
891    }
892}