Skip to main content

panache_parser/
options.rs

1use std::collections::{HashMap, HashSet};
2use std::sync::Arc;
3
4/// The flavor of Markdown to parse and format.
5/// Each flavor has a different set of default extensions enabled.
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
8#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
9pub enum Flavor {
10    /// Standard Pandoc Markdown (default extensions enabled)
11    #[default]
12    Pandoc,
13    /// Quarto (Pandoc + Quarto-specific extensions)
14    Quarto,
15    /// R Markdown (Pandoc + R-specific extensions)
16    #[cfg_attr(feature = "serde", serde(rename = "rmarkdown"))]
17    RMarkdown,
18    /// GitHub Flavored Markdown
19    Gfm,
20    /// CommonMark
21    #[cfg_attr(feature = "serde", serde(alias = "commonmark"))]
22    CommonMark,
23    /// MultiMarkdown
24    #[cfg_attr(feature = "serde", serde(rename = "multimarkdown"))]
25    MultiMarkdown,
26}
27
28/// Pandoc/Markdown extensions configuration.
29/// Each field represents a specific Pandoc extension.
30/// Extensions marked with a comment indicate implementation status.
31#[derive(Debug, Clone, PartialEq)]
32#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
33#[cfg_attr(feature = "serde", serde(default))]
34#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
35#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
36pub struct Extensions {
37    // ===== Block-level extensions =====
38
39    // Headings
40    /// Require blank line before headers (default: enabled)
41    #[cfg_attr(feature = "serde", serde(alias = "blank_before_header"))]
42    pub blank_before_header: bool,
43    /// Full attribute syntax on headers {#id .class key=value}
44    #[cfg_attr(feature = "serde", serde(alias = "header_attributes"))]
45    pub header_attributes: bool,
46    /// Auto-generate identifiers from headings
47    pub auto_identifiers: bool,
48    /// Use GitHub's algorithm for auto-generated heading identifiers
49    pub gfm_auto_identifiers: bool,
50    /// Implicit header references ([Heading] links to header)
51    pub implicit_header_references: bool,
52
53    // Block quotes
54    /// Require blank line before blockquotes (default: enabled)
55    #[cfg_attr(feature = "serde", serde(alias = "blank_before_blockquote"))]
56    pub blank_before_blockquote: bool,
57
58    // Lists
59    /// Fancy list markers (roman numerals, letters, etc.)
60    #[cfg_attr(feature = "serde", serde(alias = "fancy_lists"))]
61    pub fancy_lists: bool,
62    /// Start ordered lists at arbitrary numbers
63    pub startnum: bool,
64    /// Example lists with (@) markers
65    #[cfg_attr(feature = "serde", serde(alias = "example_lists"))]
66    pub example_lists: bool,
67    /// GitHub-style task lists - [ ] and - [x]
68    #[cfg_attr(feature = "serde", serde(alias = "task_lists"))]
69    pub task_lists: bool,
70    /// Term/definition syntax
71    #[cfg_attr(feature = "serde", serde(alias = "definition_lists"))]
72    pub definition_lists: bool,
73    /// Allow lists without a preceding blank line
74    #[cfg_attr(feature = "serde", serde(alias = "lists_without_preceding_blankline"))]
75    pub lists_without_preceding_blankline: bool,
76    /// [NON-DEFAULT] Pandoc <= 2.0 list semantics: continuation paragraphs and
77    /// nested lists require four-space (one tab-width) indentation
78    #[cfg_attr(feature = "serde", serde(alias = "four_space_rule"))]
79    pub four_space_rule: bool,
80
81    // Code blocks
82    /// Fenced code blocks with backticks
83    #[cfg_attr(feature = "serde", serde(alias = "backtick_code_blocks"))]
84    pub backtick_code_blocks: bool,
85    /// Fenced code blocks with tildes
86    #[cfg_attr(feature = "serde", serde(alias = "fenced_code_blocks"))]
87    pub fenced_code_blocks: bool,
88    /// Attributes on fenced code blocks {.language #id}
89    #[cfg_attr(feature = "serde", serde(alias = "fenced_code_attributes"))]
90    pub fenced_code_attributes: bool,
91    /// Executable code syntax (currently fenced chunks like ```{r} / ```{python})
92    pub executable_code: bool,
93    /// R Markdown inline executable code (`...`r ...)
94    pub rmarkdown_inline_code: bool,
95    /// Quarto inline executable code (`...`{r} ...)
96    pub quarto_inline_code: bool,
97    /// Attributes on inline code
98    #[cfg_attr(feature = "serde", serde(alias = "inline_code_attributes"))]
99    pub inline_code_attributes: bool,
100
101    // Tables
102    /// Simple table syntax
103    #[cfg_attr(feature = "serde", serde(alias = "simple_tables"))]
104    pub simple_tables: bool,
105    /// Multiline cell content in tables
106    #[cfg_attr(feature = "serde", serde(alias = "multiline_tables"))]
107    pub multiline_tables: bool,
108    /// Grid-style tables
109    #[cfg_attr(feature = "serde", serde(alias = "grid_tables"))]
110    pub grid_tables: bool,
111    /// Pipe tables (GitHub/PHP Markdown style)
112    #[cfg_attr(feature = "serde", serde(alias = "pipe_tables"))]
113    pub pipe_tables: bool,
114    /// Table captions
115    #[cfg_attr(feature = "serde", serde(alias = "table_captions"))]
116    pub table_captions: bool,
117
118    // Divs
119    /// Fenced divs ::: {.class}
120    #[cfg_attr(feature = "serde", serde(alias = "fenced_divs"))]
121    pub fenced_divs: bool,
122    /// HTML <div> elements
123    #[cfg_attr(feature = "serde", serde(alias = "native_divs"))]
124    pub native_divs: bool,
125
126    // Other block elements
127    /// Line blocks for poetry | prefix
128    #[cfg_attr(feature = "serde", serde(alias = "line_blocks"))]
129    pub line_blocks: bool,
130
131    // ===== Inline elements =====
132
133    // Emphasis
134    /// Underscores don't trigger emphasis in snake_case
135    #[cfg_attr(feature = "serde", serde(alias = "intraword_underscores"))]
136    pub intraword_underscores: bool,
137    /// Strikethrough ~~text~~
138    pub strikeout: bool,
139    /// Superscript and subscript ^super^ ~sub~
140    pub superscript: bool,
141    pub subscript: bool,
142
143    // Links
144    /// Inline links [text](url)
145    #[cfg_attr(feature = "serde", serde(alias = "inline_links"))]
146    pub inline_links: bool,
147    /// Reference links [text][ref]
148    #[cfg_attr(feature = "serde", serde(alias = "reference_links"))]
149    pub reference_links: bool,
150    /// Shortcut reference links [ref] without second []
151    #[cfg_attr(feature = "serde", serde(alias = "shortcut_reference_links"))]
152    pub shortcut_reference_links: bool,
153    /// Attributes on links [text](url){.class}
154    #[cfg_attr(feature = "serde", serde(alias = "link_attributes"))]
155    pub link_attributes: bool,
156    /// Automatic links <http://example.com>
157    pub autolinks: bool,
158
159    // Images
160    /// Inline images ![alt](url)
161    #[cfg_attr(feature = "serde", serde(alias = "inline_images"))]
162    pub inline_images: bool,
163    /// Paragraph with just image becomes figure
164    #[cfg_attr(feature = "serde", serde(alias = "implicit_figures"))]
165    pub implicit_figures: bool,
166
167    // Math
168    /// Dollar-delimited math $x$ and $$equation$$
169    #[cfg_attr(feature = "serde", serde(alias = "tex_math_dollars"))]
170    pub tex_math_dollars: bool,
171    /// [NON-DEFAULT] GFM math: inline $`...`$ and fenced ``` math blocks
172    #[cfg_attr(feature = "serde", serde(alias = "tex_math_gfm"))]
173    pub tex_math_gfm: bool,
174    /// [NON-DEFAULT] Single backslash math \(...\) and \[...\] (RMarkdown default)
175    #[cfg_attr(feature = "serde", serde(alias = "tex_math_single_backslash"))]
176    pub tex_math_single_backslash: bool,
177    /// [NON-DEFAULT] Double backslash math \\(...\\) and \\[...\\]
178    #[cfg_attr(feature = "serde", serde(alias = "tex_math_double_backslash"))]
179    pub tex_math_double_backslash: bool,
180
181    // Footnotes
182    /// Inline footnotes ^[text]
183    #[cfg_attr(feature = "serde", serde(alias = "inline_footnotes"))]
184    pub inline_footnotes: bool,
185    /// Reference footnotes `[^1]` (requires footnote parsing)
186    pub footnotes: bool,
187
188    // Citations
189    /// Citation syntax [@cite]
190    pub citations: bool,
191
192    // Spans
193    /// Bracketed spans [text]{.class}
194    #[cfg_attr(feature = "serde", serde(alias = "bracketed_spans"))]
195    pub bracketed_spans: bool,
196    /// HTML <span> elements
197    #[cfg_attr(feature = "serde", serde(alias = "native_spans"))]
198    pub native_spans: bool,
199
200    // ===== Metadata =====
201    /// YAML metadata block
202    #[cfg_attr(feature = "serde", serde(alias = "yaml_metadata_block"))]
203    pub yaml_metadata_block: bool,
204    /// Pandoc title block (Title/Author/Date)
205    #[cfg_attr(feature = "serde", serde(alias = "pandoc_title_block"))]
206    pub pandoc_title_block: bool,
207    /// [NON-DEFAULT] MultiMarkdown metadata/title block (Key: Value ...)
208    pub mmd_title_block: bool,
209
210    // ===== Raw content =====
211    /// Raw HTML blocks and inline
212    #[cfg_attr(feature = "serde", serde(alias = "raw_html"))]
213    pub raw_html: bool,
214    /// Markdown inside HTML blocks
215    #[cfg_attr(feature = "serde", serde(alias = "markdown_in_html_blocks"))]
216    pub markdown_in_html_blocks: bool,
217    /// LaTeX commands and environments
218    #[cfg_attr(feature = "serde", serde(alias = "raw_tex"))]
219    pub raw_tex: bool,
220    /// Generic raw blocks with {=format} syntax
221    #[cfg_attr(feature = "serde", serde(alias = "raw_attribute"))]
222    pub raw_attribute: bool,
223
224    // ===== Escapes and special characters =====
225    /// Backslash escapes any symbol
226    #[cfg_attr(feature = "serde", serde(alias = "all_symbols_escapable"))]
227    pub all_symbols_escapable: bool,
228    /// Backslash at line end = hard line break
229    #[cfg_attr(feature = "serde", serde(alias = "escaped_line_breaks"))]
230    pub escaped_line_breaks: bool,
231
232    // ===== NON-DEFAULT EXTENSIONS =====
233    // These are disabled by default in Pandoc
234    /// [NON-DEFAULT] Bare URLs become links
235    #[cfg_attr(feature = "serde", serde(alias = "autolink_bare_uris"))]
236    pub autolink_bare_uris: bool,
237    /// [NON-DEFAULT] Newline = <br>
238    #[cfg_attr(feature = "serde", serde(alias = "hard_line_breaks"))]
239    pub hard_line_breaks: bool,
240    /// [NON-DEFAULT] Ignore soft breaks between two East Asian wide characters
241    #[cfg_attr(feature = "serde", serde(alias = "east_asian_line_breaks"))]
242    pub east_asian_line_breaks: bool,
243    /// [NON-DEFAULT] MultiMarkdown style heading identifiers [my-id]
244    pub mmd_header_identifiers: bool,
245    /// [NON-DEFAULT] MultiMarkdown key=value attributes on reference defs
246    pub mmd_link_attributes: bool,
247    /// [NON-DEFAULT] GitHub/CommonMark alerts in blockquotes (`> [!NOTE]`)
248    pub alerts: bool,
249    /// [NON-DEFAULT] :emoji: syntax
250    pub emoji: bool,
251    /// [NON-DEFAULT] Highlighted ==text==
252    pub mark: bool,
253    /// [NON-DEFAULT] Pandoc wikilinks with title after pipe: `[[url|title]]`
254    #[cfg_attr(feature = "serde", serde(alias = "wikilinks_title_after_pipe"))]
255    pub wikilinks_title_after_pipe: bool,
256    /// [NON-DEFAULT] Pandoc wikilinks with title before pipe: `[[title|url]]`
257    #[cfg_attr(feature = "serde", serde(alias = "wikilinks_title_before_pipe"))]
258    pub wikilinks_title_before_pipe: bool,
259    /// [NON-DEFAULT] Allow whitespace between reference link brackets: `[foo] [bar]`
260    #[cfg_attr(feature = "serde", serde(alias = "spaced_reference_links"))]
261    pub spaced_reference_links: bool,
262
263    // ===== Quarto-specific extensions =====
264    /// Quarto callout blocks (.callout-note, etc.)
265    #[cfg_attr(feature = "serde", serde(alias = "quarto_callouts"))]
266    pub quarto_callouts: bool,
267    /// Quarto cross-references @fig-id, @tbl-id
268    #[cfg_attr(feature = "serde", serde(alias = "quarto_crossrefs"))]
269    pub quarto_crossrefs: bool,
270    /// Quarto shortcodes {{< name args >}}
271    #[cfg_attr(feature = "serde", serde(alias = "quarto_shortcodes"))]
272    pub quarto_shortcodes: bool,
273    /// Bookdown references \@ref(label) and (\#label)
274    pub bookdown_references: bool,
275    /// Bookdown equation references in LaTeX math blocks (\#eq:label)
276    pub bookdown_equation_references: bool,
277}
278
279impl Default for Extensions {
280    fn default() -> Self {
281        Self::for_flavor(Flavor::default())
282    }
283}
284
285impl Extensions {
286    fn none_defaults() -> Self {
287        Self {
288            alerts: false,
289            all_symbols_escapable: false,
290            auto_identifiers: false,
291            autolink_bare_uris: false,
292            autolinks: false,
293            backtick_code_blocks: false,
294            blank_before_blockquote: false,
295            blank_before_header: false,
296            bookdown_references: false,
297            bookdown_equation_references: false,
298            bracketed_spans: false,
299            citations: false,
300            definition_lists: false,
301            lists_without_preceding_blankline: false,
302            emoji: false,
303            escaped_line_breaks: false,
304            example_lists: false,
305            executable_code: false,
306            rmarkdown_inline_code: false,
307            quarto_inline_code: false,
308            fancy_lists: false,
309            fenced_code_attributes: false,
310            fenced_code_blocks: false,
311            fenced_divs: false,
312            footnotes: false,
313            four_space_rule: false,
314            gfm_auto_identifiers: false,
315            grid_tables: false,
316            east_asian_line_breaks: false,
317            hard_line_breaks: false,
318            header_attributes: false,
319            implicit_figures: false,
320            implicit_header_references: false,
321            inline_code_attributes: false,
322            inline_footnotes: false,
323            inline_images: false,
324            inline_links: false,
325            intraword_underscores: false,
326            line_blocks: false,
327            link_attributes: false,
328            mark: false,
329            markdown_in_html_blocks: false,
330            mmd_header_identifiers: false,
331            mmd_link_attributes: false,
332            mmd_title_block: false,
333            multiline_tables: false,
334            native_divs: false,
335            native_spans: false,
336            pandoc_title_block: false,
337            pipe_tables: false,
338            quarto_callouts: false,
339            quarto_crossrefs: false,
340            quarto_shortcodes: false,
341            raw_attribute: false,
342            raw_html: false,
343            raw_tex: false,
344            reference_links: false,
345            shortcut_reference_links: false,
346            simple_tables: false,
347            startnum: false,
348            strikeout: false,
349            subscript: false,
350            superscript: false,
351            table_captions: false,
352            task_lists: false,
353            tex_math_dollars: false,
354            tex_math_double_backslash: false,
355            tex_math_gfm: false,
356            tex_math_single_backslash: false,
357            wikilinks_title_after_pipe: false,
358            wikilinks_title_before_pipe: false,
359            spaced_reference_links: false,
360            yaml_metadata_block: false,
361        }
362    }
363
364    /// Get the default extension set for a given flavor.
365    pub fn for_flavor(flavor: Flavor) -> Self {
366        match flavor {
367            Flavor::Pandoc => Self::pandoc_defaults(),
368            Flavor::Quarto => Self::quarto_defaults(),
369            Flavor::RMarkdown => Self::rmarkdown_defaults(),
370            Flavor::Gfm => Self::gfm_defaults(),
371            Flavor::CommonMark => Self::commonmark_defaults(),
372            Flavor::MultiMarkdown => Self::multimarkdown_defaults(),
373        }
374    }
375
376    fn pandoc_defaults() -> Self {
377        Self {
378            // Block-level - enabled by default in Pandoc
379            auto_identifiers: true,
380            blank_before_blockquote: true,
381            blank_before_header: true,
382            gfm_auto_identifiers: false,
383            header_attributes: true,
384            implicit_header_references: true,
385
386            // Lists
387            definition_lists: true,
388            example_lists: true,
389            fancy_lists: true,
390            lists_without_preceding_blankline: false,
391            startnum: true,
392            task_lists: true,
393
394            // Code
395            backtick_code_blocks: true,
396            executable_code: false,
397            rmarkdown_inline_code: false,
398            quarto_inline_code: false,
399            fenced_code_attributes: true,
400            fenced_code_blocks: true,
401            inline_code_attributes: true,
402
403            // Tables
404            grid_tables: true,
405            multiline_tables: true,
406            pipe_tables: true,
407            simple_tables: true,
408            table_captions: true,
409
410            // Divs
411            fenced_divs: true,
412            native_divs: true,
413
414            // Other blocks
415            line_blocks: true,
416
417            // Inline
418            intraword_underscores: true,
419            strikeout: true,
420            subscript: true,
421            superscript: true,
422
423            // Links
424            autolinks: true,
425            inline_links: true,
426            link_attributes: true,
427            reference_links: true,
428            shortcut_reference_links: true,
429
430            // Images
431            implicit_figures: true,
432            inline_images: true,
433
434            // Math
435            tex_math_dollars: true,
436            tex_math_double_backslash: false,
437            tex_math_gfm: false,
438            tex_math_single_backslash: false,
439
440            // Footnotes
441            footnotes: true,
442            inline_footnotes: true,
443
444            // Citations
445            citations: true,
446
447            // Spans
448            bracketed_spans: true,
449            native_spans: true,
450
451            // Metadata
452            mmd_title_block: false,
453            pandoc_title_block: true,
454            yaml_metadata_block: true,
455
456            // Raw
457            markdown_in_html_blocks: false,
458            raw_attribute: true,
459            raw_html: true,
460            raw_tex: true,
461
462            // Escapes
463            all_symbols_escapable: true,
464            escaped_line_breaks: true,
465
466            // Non-default
467            alerts: false,
468            autolink_bare_uris: false,
469            east_asian_line_breaks: false,
470            emoji: false,
471            four_space_rule: false,
472            hard_line_breaks: false,
473            mark: false,
474            mmd_header_identifiers: false,
475            mmd_link_attributes: false,
476
477            // Quarto/Bookdown-specific
478            bookdown_references: false,
479            bookdown_equation_references: false,
480            quarto_callouts: false,
481            quarto_crossrefs: false,
482            quarto_shortcodes: false,
483
484            // Wikilinks (opt-in, no flavor default)
485            wikilinks_title_after_pipe: false,
486            wikilinks_title_before_pipe: false,
487
488            // Spaced reference links (opt-in)
489            spaced_reference_links: false,
490        }
491    }
492
493    fn quarto_defaults() -> Self {
494        let mut ext = Self::pandoc_defaults();
495
496        ext.executable_code = true;
497        ext.rmarkdown_inline_code = true;
498        ext.quarto_inline_code = true;
499        ext.quarto_callouts = true;
500        ext.quarto_crossrefs = true;
501        ext.quarto_shortcodes = true;
502
503        ext
504    }
505
506    fn rmarkdown_defaults() -> Self {
507        let mut ext = Self::pandoc_defaults();
508
509        ext.bookdown_references = true;
510        ext.bookdown_equation_references = true;
511        ext.executable_code = true;
512        ext.rmarkdown_inline_code = true;
513        ext.quarto_inline_code = false;
514        ext.tex_math_dollars = true;
515        ext.tex_math_single_backslash = true;
516
517        ext
518    }
519
520    fn gfm_defaults() -> Self {
521        let mut ext = Self::none_defaults();
522
523        ext.alerts = true;
524        ext.auto_identifiers = true;
525        ext.autolink_bare_uris = true;
526        ext.autolinks = true;
527        ext.backtick_code_blocks = true;
528        ext.emoji = true;
529        ext.fenced_code_blocks = true;
530        ext.footnotes = true;
531        ext.gfm_auto_identifiers = true;
532        ext.inline_images = true;
533        ext.inline_links = true;
534        ext.pipe_tables = true;
535        ext.raw_html = true;
536        ext.reference_links = true;
537        ext.shortcut_reference_links = true;
538        ext.strikeout = true;
539        ext.task_lists = true;
540        ext.tex_math_dollars = true;
541        ext.tex_math_gfm = true;
542        ext.yaml_metadata_block = true;
543
544        ext
545    }
546
547    fn commonmark_defaults() -> Self {
548        let mut ext = Self::none_defaults();
549        // CommonMark's core grammar is what pandoc's commonmark reader treats
550        // as "not extensions" — they're built into the reader. Panache's
551        // parser still gates each construct on its extension flag, so we have
552        // to enable the CommonMark-mandatory ones explicitly here.
553        //
554        // Notably absent: `all_symbols_escapable`. CommonMark only allows
555        // backslash escapes of ASCII punctuation, and panache's
556        // `all_symbols_escapable` flag widens that to any character — so it
557        // must stay off for CommonMark.
558        ext.autolinks = true;
559        ext.backtick_code_blocks = true;
560        ext.escaped_line_breaks = true;
561        ext.fenced_code_blocks = true;
562        ext.inline_images = true;
563        ext.inline_links = true;
564        ext.intraword_underscores = true;
565        ext.raw_html = true;
566        ext.reference_links = true;
567        ext.shortcut_reference_links = true;
568        ext
569    }
570
571    fn multimarkdown_defaults() -> Self {
572        let mut ext = Self::none_defaults();
573
574        ext.all_symbols_escapable = true;
575        ext.auto_identifiers = true;
576        ext.backtick_code_blocks = true;
577        ext.definition_lists = true;
578        ext.footnotes = true;
579        ext.implicit_figures = true;
580        ext.implicit_header_references = true;
581        ext.intraword_underscores = true;
582        ext.mmd_header_identifiers = true;
583        ext.mmd_link_attributes = true;
584        ext.mmd_title_block = true;
585        ext.pipe_tables = true;
586        ext.raw_attribute = true;
587        ext.raw_html = true;
588        ext.reference_links = true;
589        ext.shortcut_reference_links = true;
590        ext.subscript = true;
591        ext.superscript = true;
592        ext.tex_math_dollars = true;
593        ext.tex_math_double_backslash = true;
594
595        ext
596    }
597
598    /// Merge user-specified extension overrides with flavor defaults.
599    ///
600    /// This is used to support partial extension overrides in config files.
601    /// For example, if a user specifies `flavor = "quarto"` and then sets
602    /// `[extensions] quarto-crossrefs = false`, we want all other extensions
603    /// to use Quarto defaults, not Pandoc defaults.
604    ///
605    /// # Arguments
606    /// * `user_overrides` - Map of extension names to their user-specified values
607    /// * `flavor` - The flavor to use for default values
608    ///
609    /// # Returns
610    /// A new Extensions struct with flavor defaults merged with user overrides
611    pub fn merge_with_flavor(user_overrides: HashMap<String, bool>, flavor: Flavor) -> Self {
612        let defaults = Self::for_flavor(flavor);
613        Self::merge_overrides(defaults, user_overrides)
614    }
615
616    /// Apply `user_overrides` on top of an already-resolved `Extensions`.
617    /// Unknown keys are silently ignored (mirrors the panache.toml loader).
618    /// Use this when overriding individual extensions on top of a config that
619    /// has already merged flavor defaults + file-based overrides (e.g. CLI
620    /// `-o extensions.<name>=<bool>`).
621    pub fn apply_overrides(&mut self, user_overrides: HashMap<String, bool>) {
622        *self = Self::merge_overrides(self.clone(), user_overrides);
623    }
624
625    fn merge_overrides(mut base: Extensions, user_overrides: HashMap<String, bool>) -> Self {
626        for (key, value) in user_overrides {
627            base.set_by_name(&key, value);
628        }
629        base
630    }
631}
632
633/// Define the canonical mapping between kebab-case extension names (as users
634/// write them in `[extensions]`) and the corresponding `Extensions` fields.
635/// Drives both the runtime setter and the public name list, so adding an
636/// extension means editing exactly one table.
637macro_rules! known_extensions {
638    ( $( $kebab:literal => $field:ident ),* $(,)? ) => {
639        impl Extensions {
640            /// Canonical kebab-case names accepted in `[extensions]`. Used by
641            /// the config loader's typo check and by the JSON Schema
642            /// generator. Snake_case is also accepted at runtime via
643            /// normalization in [`Extensions::set_by_name`].
644            pub const KNOWN_NAMES: &'static [&'static str] = &[ $($kebab),* ];
645
646            /// True if `name` (in either kebab- or snake-case) is a known
647            /// extension key.
648            pub fn is_known_name(name: &str) -> bool {
649                let normalized = name.replace('_', "-");
650                Self::KNOWN_NAMES.iter().any(|k| *k == normalized)
651            }
652
653            /// Set the named extension on `self`, returning `true` if `name`
654            /// matched a known field. Kebab- and snake-case are accepted.
655            fn set_by_name(&mut self, name: &str, value: bool) -> bool {
656                match name.replace('_', "-").as_str() {
657                    $( $kebab => { self.$field = value; true } )*
658                    _ => false,
659                }
660            }
661        }
662    };
663}
664
665known_extensions! {
666    "blank-before-header" => blank_before_header,
667    "header-attributes" => header_attributes,
668    "auto-identifiers" => auto_identifiers,
669    "gfm-auto-identifiers" => gfm_auto_identifiers,
670    "implicit-header-references" => implicit_header_references,
671    "blank-before-blockquote" => blank_before_blockquote,
672    "fancy-lists" => fancy_lists,
673    "startnum" => startnum,
674    "example-lists" => example_lists,
675    "task-lists" => task_lists,
676    "definition-lists" => definition_lists,
677    "lists-without-preceding-blankline" => lists_without_preceding_blankline,
678    "four-space-rule" => four_space_rule,
679    "backtick-code-blocks" => backtick_code_blocks,
680    "fenced-code-blocks" => fenced_code_blocks,
681    "fenced-code-attributes" => fenced_code_attributes,
682    "executable-code" => executable_code,
683    "rmarkdown-inline-code" => rmarkdown_inline_code,
684    "quarto-inline-code" => quarto_inline_code,
685    "inline-code-attributes" => inline_code_attributes,
686    "simple-tables" => simple_tables,
687    "multiline-tables" => multiline_tables,
688    "grid-tables" => grid_tables,
689    "pipe-tables" => pipe_tables,
690    "table-captions" => table_captions,
691    "fenced-divs" => fenced_divs,
692    "native-divs" => native_divs,
693    "line-blocks" => line_blocks,
694    "intraword-underscores" => intraword_underscores,
695    "strikeout" => strikeout,
696    "superscript" => superscript,
697    "subscript" => subscript,
698    "inline-links" => inline_links,
699    "reference-links" => reference_links,
700    "shortcut-reference-links" => shortcut_reference_links,
701    "link-attributes" => link_attributes,
702    "autolinks" => autolinks,
703    "inline-images" => inline_images,
704    "implicit-figures" => implicit_figures,
705    "tex-math-dollars" => tex_math_dollars,
706    "tex-math-gfm" => tex_math_gfm,
707    "tex-math-single-backslash" => tex_math_single_backslash,
708    "tex-math-double-backslash" => tex_math_double_backslash,
709    "inline-footnotes" => inline_footnotes,
710    "footnotes" => footnotes,
711    "citations" => citations,
712    "bracketed-spans" => bracketed_spans,
713    "native-spans" => native_spans,
714    "yaml-metadata-block" => yaml_metadata_block,
715    "pandoc-title-block" => pandoc_title_block,
716    "mmd-title-block" => mmd_title_block,
717    "raw-html" => raw_html,
718    "markdown-in-html-blocks" => markdown_in_html_blocks,
719    "raw-tex" => raw_tex,
720    "raw-attribute" => raw_attribute,
721    "all-symbols-escapable" => all_symbols_escapable,
722    "escaped-line-breaks" => escaped_line_breaks,
723    "autolink-bare-uris" => autolink_bare_uris,
724    "hard-line-breaks" => hard_line_breaks,
725    "east-asian-line-breaks" => east_asian_line_breaks,
726    "mmd-header-identifiers" => mmd_header_identifiers,
727    "mmd-link-attributes" => mmd_link_attributes,
728    "alerts" => alerts,
729    "emoji" => emoji,
730    "mark" => mark,
731    "quarto-callouts" => quarto_callouts,
732    "quarto-crossrefs" => quarto_crossrefs,
733    "quarto-shortcodes" => quarto_shortcodes,
734    "bookdown-references" => bookdown_references,
735    "bookdown-equation-references" => bookdown_equation_references,
736    "wikilinks-title-after-pipe" => wikilinks_title_after_pipe,
737    "wikilinks-title-before-pipe" => wikilinks_title_before_pipe,
738    "spaced-reference-links" => spaced_reference_links,
739}
740
741#[cfg(test)]
742mod tests {
743    use super::{Extensions, Flavor};
744    use std::collections::HashMap;
745
746    #[test]
747    fn merge_with_flavor_keeps_known_extension_overrides() {
748        let mut overrides = HashMap::new();
749        overrides.insert("intraword-underscores".to_string(), false);
750        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
751        assert!(!ext.intraword_underscores);
752    }
753
754    #[test]
755    fn merge_with_flavor_ignores_unknown_extension_overrides() {
756        let mut overrides = HashMap::new();
757        overrides.insert("smart".to_string(), true);
758        overrides.insert("smart-quotes".to_string(), true);
759        let ext = Extensions::merge_with_flavor(overrides, Flavor::Gfm);
760        assert!(ext.strikeout, "known defaults should remain intact");
761    }
762
763    #[test]
764    fn lists_without_preceding_blankline_defaults_false_for_pandoc_and_gfm() {
765        assert!(!Extensions::for_flavor(Flavor::Pandoc).lists_without_preceding_blankline);
766        assert!(!Extensions::for_flavor(Flavor::Gfm).lists_without_preceding_blankline);
767    }
768
769    #[test]
770    fn merge_with_flavor_accepts_lists_without_preceding_blankline_override() {
771        let mut overrides = HashMap::new();
772        overrides.insert("lists-without-preceding-blankline".to_string(), true);
773        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
774        assert!(ext.lists_without_preceding_blankline);
775    }
776
777    #[test]
778    fn four_space_rule_defaults_off_for_every_flavor() {
779        for flavor in [
780            Flavor::Pandoc,
781            Flavor::Quarto,
782            Flavor::RMarkdown,
783            Flavor::Gfm,
784            Flavor::CommonMark,
785            Flavor::MultiMarkdown,
786        ] {
787            assert!(
788                !Extensions::for_flavor(flavor).four_space_rule,
789                "four_space_rule should be off by default for {flavor:?}"
790            );
791        }
792    }
793
794    #[test]
795    fn merge_with_flavor_accepts_four_space_rule_override() {
796        let mut overrides = HashMap::new();
797        overrides.insert("four-space-rule".to_string(), true);
798        let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
799        assert!(ext.four_space_rule);
800    }
801}
802
803#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
804#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
805pub enum PandocCompat {
806    /// Alias for Panache's pinned newest supported Pandoc-compat behavior.
807    ///
808    /// This is intentionally NOT "floating upstream latest". It resolves to
809    /// a concrete version that Panache has verified, and is bumped manually.
810    #[cfg_attr(feature = "serde", serde(rename = "latest"))]
811    Latest,
812    /// Match Pandoc 3.7 behavior for ambiguous syntax edge cases.
813    #[cfg_attr(
814        feature = "serde",
815        serde(rename = "3.7", alias = "3-7", alias = "v3.7", alias = "v3-7")
816    )]
817    V3_7,
818    /// Match Pandoc 3.9 behavior for ambiguous syntax edge cases.
819    #[default]
820    #[cfg_attr(
821        feature = "serde",
822        serde(rename = "3.9", alias = "3-9", alias = "v3.9", alias = "v3-9")
823    )]
824    V3_9,
825}
826
827impl PandocCompat {
828    /// Pinned target for `latest`.
829    pub const PINNED_LATEST: Self = Self::V3_9;
830
831    pub fn effective(self) -> Self {
832        match self {
833            Self::Latest => Self::PINNED_LATEST,
834            other => other,
835        }
836    }
837}
838
839/// Parser dialect — the underlying inline tokenization rule set.
840///
841/// Distinct from [`Flavor`]: `Flavor` is the user-facing identity (Pandoc,
842/// Quarto, GFM, etc.) and selects extension defaults; `Dialect` is the
843/// structural parser identity. Several flavors share a dialect — Quarto and
844/// RMarkdown both use `Pandoc`; CommonMark and GFM both use `CommonMark`.
845///
846/// Use this for parser branches whose behavior is fundamentally different
847/// between dialect families (e.g. unmatched backtick run handling). Per-flavor
848/// feature toggles still belong on [`Extensions`].
849#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
850#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
851#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
852#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
853pub enum Dialect {
854    /// Pandoc-markdown family. Default for Pandoc, Quarto, RMarkdown,
855    /// MultiMarkdown.
856    #[default]
857    Pandoc,
858    /// CommonMark family. Default for CommonMark and GFM.
859    CommonMark,
860}
861
862impl Dialect {
863    /// Default dialect for a given user-facing flavor.
864    pub fn for_flavor(flavor: Flavor) -> Self {
865        match flavor {
866            Flavor::CommonMark | Flavor::Gfm => Dialect::CommonMark,
867            Flavor::Pandoc | Flavor::Quarto | Flavor::RMarkdown | Flavor::MultiMarkdown => {
868                Dialect::Pandoc
869            }
870        }
871    }
872}
873
874#[derive(Debug, Clone)]
875#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
876#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
877#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
878pub struct ParserOptions {
879    pub flavor: Flavor,
880    pub dialect: Dialect,
881    pub extensions: Extensions,
882    /// Compatibility target for ambiguous Pandoc behavior.
883    pub pandoc_compat: PandocCompat,
884    /// Additional cross-reference key prefixes (beyond the Quarto built-ins
885    /// recognized by [`crate::parser::inlines::citations::is_quarto_crossref_key`])
886    /// that should parse as cross-references rather than citations. Populated
887    /// from the top-level `crossref-prefixes` config key so documents relying on
888    /// crossref-injecting extensions (e.g. pseudocode's `@algo-`) don't have
889    /// those references misclassified as citations.
890    #[cfg_attr(feature = "serde", serde(default, alias = "crossref_prefixes"))]
891    pub crossref_prefixes: Vec<String>,
892    /// Document-level reference link label set, populated by the
893    /// top-level `parse()` function when running CommonMark dialect and
894    /// consulted by inline parsing's bracket resolution pass. `None`
895    /// means "not pre-computed"; the inline pipeline then treats every
896    /// reference-shaped bracket pair conservatively (current behavior),
897    /// which is correct for the Pandoc dialect and a graceful
898    /// degradation for embedded use cases that bypass `parse()`.
899    ///
900    /// Skipped by serde so config files don't try to (de)serialize a
901    /// runtime cache.
902    #[cfg_attr(feature = "serde", serde(skip))]
903    pub refdef_labels: Option<Arc<HashSet<String>>>,
904}
905
906impl Default for ParserOptions {
907    fn default() -> Self {
908        let flavor = Flavor::default();
909        Self {
910            flavor,
911            dialect: Dialect::for_flavor(flavor),
912            extensions: Extensions::for_flavor(flavor),
913            pandoc_compat: PandocCompat::default(),
914            crossref_prefixes: Vec::new(),
915            refdef_labels: None,
916        }
917    }
918}
919
920impl ParserOptions {
921    pub fn effective_pandoc_compat(&self) -> PandocCompat {
922        self.pandoc_compat.effective()
923    }
924}
925
926#[cfg(feature = "schema")]
927impl schemars::JsonSchema for Flavor {
928    fn schema_name() -> std::borrow::Cow<'static, str> {
929        "Flavor".into()
930    }
931
932    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
933        // Include serde aliases so the schema accepts every spelling the
934        // parser accepts (e.g. `commonmark` alongside the kebab-case
935        // `common-mark` canonical form).
936        schemars::json_schema!({
937            "type": "string",
938            "description": "Markdown flavor to parse and format against.",
939            "enum": [
940                "pandoc",
941                "quarto",
942                "rmarkdown",
943                "gfm",
944                "common-mark",
945                "commonmark",
946                "multimarkdown"
947            ]
948        })
949    }
950}
951
952#[cfg(feature = "schema")]
953impl schemars::JsonSchema for PandocCompat {
954    fn schema_name() -> std::borrow::Cow<'static, str> {
955        "PandocCompat".into()
956    }
957
958    fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
959        schemars::json_schema!({
960            "type": "string",
961            "description": "Compatibility target for ambiguous Pandoc behavior.",
962            "enum": [
963                "latest",
964                "3.7", "3-7", "v3.7", "v3-7",
965                "3.9", "3-9", "v3.9", "v3-9"
966            ]
967        })
968    }
969}