use std::collections::{HashMap, HashSet};
use std::sync::Arc;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
pub enum Flavor {
#[default]
Pandoc,
Quarto,
#[cfg_attr(feature = "serde", serde(rename = "rmarkdown"))]
RMarkdown,
Gfm,
#[cfg_attr(feature = "serde", serde(alias = "commonmark"))]
CommonMark,
#[cfg_attr(feature = "serde", serde(rename = "multimarkdown"))]
MultiMarkdown,
}
#[derive(Debug, Clone, PartialEq)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(default))]
#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct Extensions {
#[cfg_attr(feature = "serde", serde(alias = "blank_before_header"))]
pub blank_before_header: bool,
#[cfg_attr(feature = "serde", serde(alias = "header_attributes"))]
pub header_attributes: bool,
pub auto_identifiers: bool,
pub gfm_auto_identifiers: bool,
pub implicit_header_references: bool,
#[cfg_attr(feature = "serde", serde(alias = "blank_before_blockquote"))]
pub blank_before_blockquote: bool,
#[cfg_attr(feature = "serde", serde(alias = "fancy_lists"))]
pub fancy_lists: bool,
pub startnum: bool,
#[cfg_attr(feature = "serde", serde(alias = "example_lists"))]
pub example_lists: bool,
#[cfg_attr(feature = "serde", serde(alias = "task_lists"))]
pub task_lists: bool,
#[cfg_attr(feature = "serde", serde(alias = "definition_lists"))]
pub definition_lists: bool,
#[cfg_attr(feature = "serde", serde(alias = "lists_without_preceding_blankline"))]
pub lists_without_preceding_blankline: bool,
#[cfg_attr(feature = "serde", serde(alias = "four_space_rule"))]
pub four_space_rule: bool,
#[cfg_attr(feature = "serde", serde(alias = "backtick_code_blocks"))]
pub backtick_code_blocks: bool,
#[cfg_attr(feature = "serde", serde(alias = "fenced_code_blocks"))]
pub fenced_code_blocks: bool,
#[cfg_attr(feature = "serde", serde(alias = "fenced_code_attributes"))]
pub fenced_code_attributes: bool,
pub executable_code: bool,
pub rmarkdown_inline_code: bool,
pub quarto_inline_code: bool,
#[cfg_attr(feature = "serde", serde(alias = "inline_code_attributes"))]
pub inline_code_attributes: bool,
#[cfg_attr(feature = "serde", serde(alias = "simple_tables"))]
pub simple_tables: bool,
#[cfg_attr(feature = "serde", serde(alias = "multiline_tables"))]
pub multiline_tables: bool,
#[cfg_attr(feature = "serde", serde(alias = "grid_tables"))]
pub grid_tables: bool,
#[cfg_attr(feature = "serde", serde(alias = "pipe_tables"))]
pub pipe_tables: bool,
#[cfg_attr(feature = "serde", serde(alias = "table_captions"))]
pub table_captions: bool,
#[cfg_attr(feature = "serde", serde(alias = "fenced_divs"))]
pub fenced_divs: bool,
#[cfg_attr(feature = "serde", serde(alias = "native_divs"))]
pub native_divs: bool,
#[cfg_attr(feature = "serde", serde(alias = "line_blocks"))]
pub line_blocks: bool,
#[cfg_attr(feature = "serde", serde(alias = "intraword_underscores"))]
pub intraword_underscores: bool,
pub strikeout: bool,
pub superscript: bool,
pub subscript: bool,
#[cfg_attr(feature = "serde", serde(alias = "inline_links"))]
pub inline_links: bool,
#[cfg_attr(feature = "serde", serde(alias = "reference_links"))]
pub reference_links: bool,
#[cfg_attr(feature = "serde", serde(alias = "shortcut_reference_links"))]
pub shortcut_reference_links: bool,
#[cfg_attr(feature = "serde", serde(alias = "link_attributes"))]
pub link_attributes: bool,
pub autolinks: bool,
#[cfg_attr(feature = "serde", serde(alias = "inline_images"))]
pub inline_images: bool,
#[cfg_attr(feature = "serde", serde(alias = "implicit_figures"))]
pub implicit_figures: bool,
#[cfg_attr(feature = "serde", serde(alias = "tex_math_dollars"))]
pub tex_math_dollars: bool,
#[cfg_attr(feature = "serde", serde(alias = "tex_math_gfm"))]
pub tex_math_gfm: bool,
#[cfg_attr(feature = "serde", serde(alias = "tex_math_single_backslash"))]
pub tex_math_single_backslash: bool,
#[cfg_attr(feature = "serde", serde(alias = "tex_math_double_backslash"))]
pub tex_math_double_backslash: bool,
#[cfg_attr(feature = "serde", serde(alias = "inline_footnotes"))]
pub inline_footnotes: bool,
pub footnotes: bool,
pub citations: bool,
#[cfg_attr(feature = "serde", serde(alias = "bracketed_spans"))]
pub bracketed_spans: bool,
#[cfg_attr(feature = "serde", serde(alias = "native_spans"))]
pub native_spans: bool,
#[cfg_attr(feature = "serde", serde(alias = "yaml_metadata_block"))]
pub yaml_metadata_block: bool,
#[cfg_attr(feature = "serde", serde(alias = "pandoc_title_block"))]
pub pandoc_title_block: bool,
pub mmd_title_block: bool,
#[cfg_attr(feature = "serde", serde(alias = "raw_html"))]
pub raw_html: bool,
#[cfg_attr(feature = "serde", serde(alias = "markdown_in_html_blocks"))]
pub markdown_in_html_blocks: bool,
#[cfg_attr(feature = "serde", serde(alias = "raw_tex"))]
pub raw_tex: bool,
#[cfg_attr(feature = "serde", serde(alias = "raw_attribute"))]
pub raw_attribute: bool,
#[cfg_attr(feature = "serde", serde(alias = "all_symbols_escapable"))]
pub all_symbols_escapable: bool,
#[cfg_attr(feature = "serde", serde(alias = "escaped_line_breaks"))]
pub escaped_line_breaks: bool,
#[cfg_attr(feature = "serde", serde(alias = "autolink_bare_uris"))]
pub autolink_bare_uris: bool,
#[cfg_attr(feature = "serde", serde(alias = "hard_line_breaks"))]
pub hard_line_breaks: bool,
#[cfg_attr(feature = "serde", serde(alias = "east_asian_line_breaks"))]
pub east_asian_line_breaks: bool,
pub mmd_header_identifiers: bool,
pub mmd_link_attributes: bool,
pub alerts: bool,
pub emoji: bool,
pub mark: bool,
#[cfg_attr(feature = "serde", serde(alias = "quarto_callouts"))]
pub quarto_callouts: bool,
#[cfg_attr(feature = "serde", serde(alias = "quarto_crossrefs"))]
pub quarto_crossrefs: bool,
#[cfg_attr(feature = "serde", serde(alias = "quarto_shortcodes"))]
pub quarto_shortcodes: bool,
pub bookdown_references: bool,
pub bookdown_equation_references: bool,
}
impl Default for Extensions {
fn default() -> Self {
Self::for_flavor(Flavor::default())
}
}
impl Extensions {
fn none_defaults() -> Self {
Self {
alerts: false,
all_symbols_escapable: false,
auto_identifiers: false,
autolink_bare_uris: false,
autolinks: false,
backtick_code_blocks: false,
blank_before_blockquote: false,
blank_before_header: false,
bookdown_references: false,
bookdown_equation_references: false,
bracketed_spans: false,
citations: false,
definition_lists: false,
lists_without_preceding_blankline: false,
emoji: false,
escaped_line_breaks: false,
example_lists: false,
executable_code: false,
rmarkdown_inline_code: false,
quarto_inline_code: false,
fancy_lists: false,
fenced_code_attributes: false,
fenced_code_blocks: false,
fenced_divs: false,
footnotes: false,
four_space_rule: false,
gfm_auto_identifiers: false,
grid_tables: false,
east_asian_line_breaks: false,
hard_line_breaks: false,
header_attributes: false,
implicit_figures: false,
implicit_header_references: false,
inline_code_attributes: false,
inline_footnotes: false,
inline_images: false,
inline_links: false,
intraword_underscores: false,
line_blocks: false,
link_attributes: false,
mark: false,
markdown_in_html_blocks: false,
mmd_header_identifiers: false,
mmd_link_attributes: false,
mmd_title_block: false,
multiline_tables: false,
native_divs: false,
native_spans: false,
pandoc_title_block: false,
pipe_tables: false,
quarto_callouts: false,
quarto_crossrefs: false,
quarto_shortcodes: false,
raw_attribute: false,
raw_html: false,
raw_tex: false,
reference_links: false,
shortcut_reference_links: false,
simple_tables: false,
startnum: false,
strikeout: false,
subscript: false,
superscript: false,
table_captions: false,
task_lists: false,
tex_math_dollars: false,
tex_math_double_backslash: false,
tex_math_gfm: false,
tex_math_single_backslash: false,
yaml_metadata_block: false,
}
}
pub fn for_flavor(flavor: Flavor) -> Self {
match flavor {
Flavor::Pandoc => Self::pandoc_defaults(),
Flavor::Quarto => Self::quarto_defaults(),
Flavor::RMarkdown => Self::rmarkdown_defaults(),
Flavor::Gfm => Self::gfm_defaults(),
Flavor::CommonMark => Self::commonmark_defaults(),
Flavor::MultiMarkdown => Self::multimarkdown_defaults(),
}
}
fn pandoc_defaults() -> Self {
Self {
auto_identifiers: true,
blank_before_blockquote: true,
blank_before_header: true,
gfm_auto_identifiers: false,
header_attributes: true,
implicit_header_references: true,
definition_lists: true,
example_lists: true,
fancy_lists: true,
lists_without_preceding_blankline: false,
startnum: true,
task_lists: true,
backtick_code_blocks: true,
executable_code: false,
rmarkdown_inline_code: false,
quarto_inline_code: false,
fenced_code_attributes: true,
fenced_code_blocks: true,
inline_code_attributes: true,
grid_tables: true,
multiline_tables: true,
pipe_tables: true,
simple_tables: true,
table_captions: true,
fenced_divs: true,
native_divs: true,
line_blocks: true,
intraword_underscores: true,
strikeout: true,
subscript: true,
superscript: true,
autolinks: true,
inline_links: true,
link_attributes: true,
reference_links: true,
shortcut_reference_links: true,
implicit_figures: true,
inline_images: true,
tex_math_dollars: true,
tex_math_double_backslash: false,
tex_math_gfm: false,
tex_math_single_backslash: false,
footnotes: true,
inline_footnotes: true,
citations: true,
bracketed_spans: true,
native_spans: true,
mmd_title_block: false,
pandoc_title_block: true,
yaml_metadata_block: true,
markdown_in_html_blocks: false,
raw_attribute: true,
raw_html: true,
raw_tex: true,
all_symbols_escapable: true,
escaped_line_breaks: true,
alerts: false,
autolink_bare_uris: false,
east_asian_line_breaks: false,
emoji: false,
four_space_rule: false,
hard_line_breaks: false,
mark: false,
mmd_header_identifiers: false,
mmd_link_attributes: false,
bookdown_references: false,
bookdown_equation_references: false,
quarto_callouts: false,
quarto_crossrefs: false,
quarto_shortcodes: false,
}
}
fn quarto_defaults() -> Self {
let mut ext = Self::pandoc_defaults();
ext.executable_code = true;
ext.rmarkdown_inline_code = true;
ext.quarto_inline_code = true;
ext.quarto_callouts = true;
ext.quarto_crossrefs = true;
ext.quarto_shortcodes = true;
ext
}
fn rmarkdown_defaults() -> Self {
let mut ext = Self::pandoc_defaults();
ext.bookdown_references = true;
ext.bookdown_equation_references = true;
ext.executable_code = true;
ext.rmarkdown_inline_code = true;
ext.quarto_inline_code = false;
ext.tex_math_dollars = true;
ext.tex_math_single_backslash = true;
ext
}
fn gfm_defaults() -> Self {
let mut ext = Self::none_defaults();
ext.alerts = true;
ext.auto_identifiers = true;
ext.autolink_bare_uris = true;
ext.autolinks = true;
ext.backtick_code_blocks = true;
ext.emoji = true;
ext.fenced_code_blocks = true;
ext.footnotes = true;
ext.gfm_auto_identifiers = true;
ext.inline_images = true;
ext.inline_links = true;
ext.pipe_tables = true;
ext.raw_html = true;
ext.reference_links = true;
ext.shortcut_reference_links = true;
ext.strikeout = true;
ext.task_lists = true;
ext.tex_math_dollars = true;
ext.tex_math_gfm = true;
ext.yaml_metadata_block = true;
ext
}
fn commonmark_defaults() -> Self {
let mut ext = Self::none_defaults();
ext.autolinks = true;
ext.backtick_code_blocks = true;
ext.escaped_line_breaks = true;
ext.fenced_code_blocks = true;
ext.inline_images = true;
ext.inline_links = true;
ext.intraword_underscores = true;
ext.raw_html = true;
ext.reference_links = true;
ext.shortcut_reference_links = true;
ext
}
fn multimarkdown_defaults() -> Self {
let mut ext = Self::none_defaults();
ext.all_symbols_escapable = true;
ext.auto_identifiers = true;
ext.backtick_code_blocks = true;
ext.definition_lists = true;
ext.footnotes = true;
ext.implicit_figures = true;
ext.implicit_header_references = true;
ext.intraword_underscores = true;
ext.mmd_header_identifiers = true;
ext.mmd_link_attributes = true;
ext.mmd_title_block = true;
ext.pipe_tables = true;
ext.raw_attribute = true;
ext.raw_html = true;
ext.reference_links = true;
ext.shortcut_reference_links = true;
ext.subscript = true;
ext.superscript = true;
ext.tex_math_dollars = true;
ext.tex_math_double_backslash = true;
ext
}
pub fn merge_with_flavor(user_overrides: HashMap<String, bool>, flavor: Flavor) -> Self {
let defaults = Self::for_flavor(flavor);
Self::merge_overrides(defaults, user_overrides)
}
pub fn apply_overrides(&mut self, user_overrides: HashMap<String, bool>) {
*self = Self::merge_overrides(self.clone(), user_overrides);
}
fn merge_overrides(mut base: Extensions, user_overrides: HashMap<String, bool>) -> Self {
for (key, value) in user_overrides {
base.set_by_name(&key, value);
}
base
}
}
macro_rules! known_extensions {
( $( $kebab:literal => $field:ident ),* $(,)? ) => {
impl Extensions {
pub const KNOWN_NAMES: &'static [&'static str] = &[ $($kebab),* ];
pub fn is_known_name(name: &str) -> bool {
let normalized = name.replace('_', "-");
Self::KNOWN_NAMES.iter().any(|k| *k == normalized)
}
fn set_by_name(&mut self, name: &str, value: bool) -> bool {
match name.replace('_', "-").as_str() {
$( $kebab => { self.$field = value; true } )*
_ => false,
}
}
}
};
}
known_extensions! {
"blank-before-header" => blank_before_header,
"header-attributes" => header_attributes,
"auto-identifiers" => auto_identifiers,
"gfm-auto-identifiers" => gfm_auto_identifiers,
"implicit-header-references" => implicit_header_references,
"blank-before-blockquote" => blank_before_blockquote,
"fancy-lists" => fancy_lists,
"startnum" => startnum,
"example-lists" => example_lists,
"task-lists" => task_lists,
"definition-lists" => definition_lists,
"lists-without-preceding-blankline" => lists_without_preceding_blankline,
"four-space-rule" => four_space_rule,
"backtick-code-blocks" => backtick_code_blocks,
"fenced-code-blocks" => fenced_code_blocks,
"fenced-code-attributes" => fenced_code_attributes,
"executable-code" => executable_code,
"rmarkdown-inline-code" => rmarkdown_inline_code,
"quarto-inline-code" => quarto_inline_code,
"inline-code-attributes" => inline_code_attributes,
"simple-tables" => simple_tables,
"multiline-tables" => multiline_tables,
"grid-tables" => grid_tables,
"pipe-tables" => pipe_tables,
"table-captions" => table_captions,
"fenced-divs" => fenced_divs,
"native-divs" => native_divs,
"line-blocks" => line_blocks,
"intraword-underscores" => intraword_underscores,
"strikeout" => strikeout,
"superscript" => superscript,
"subscript" => subscript,
"inline-links" => inline_links,
"reference-links" => reference_links,
"shortcut-reference-links" => shortcut_reference_links,
"link-attributes" => link_attributes,
"autolinks" => autolinks,
"inline-images" => inline_images,
"implicit-figures" => implicit_figures,
"tex-math-dollars" => tex_math_dollars,
"tex-math-gfm" => tex_math_gfm,
"tex-math-single-backslash" => tex_math_single_backslash,
"tex-math-double-backslash" => tex_math_double_backslash,
"inline-footnotes" => inline_footnotes,
"footnotes" => footnotes,
"citations" => citations,
"bracketed-spans" => bracketed_spans,
"native-spans" => native_spans,
"yaml-metadata-block" => yaml_metadata_block,
"pandoc-title-block" => pandoc_title_block,
"mmd-title-block" => mmd_title_block,
"raw-html" => raw_html,
"markdown-in-html-blocks" => markdown_in_html_blocks,
"raw-tex" => raw_tex,
"raw-attribute" => raw_attribute,
"all-symbols-escapable" => all_symbols_escapable,
"escaped-line-breaks" => escaped_line_breaks,
"autolink-bare-uris" => autolink_bare_uris,
"hard-line-breaks" => hard_line_breaks,
"east-asian-line-breaks" => east_asian_line_breaks,
"mmd-header-identifiers" => mmd_header_identifiers,
"mmd-link-attributes" => mmd_link_attributes,
"alerts" => alerts,
"emoji" => emoji,
"mark" => mark,
"quarto-callouts" => quarto_callouts,
"quarto-crossrefs" => quarto_crossrefs,
"quarto-shortcodes" => quarto_shortcodes,
"bookdown-references" => bookdown_references,
"bookdown-equation-references" => bookdown_equation_references,
}
#[cfg(test)]
mod tests {
use super::{Extensions, Flavor};
use std::collections::HashMap;
#[test]
fn merge_with_flavor_keeps_known_extension_overrides() {
let mut overrides = HashMap::new();
overrides.insert("intraword-underscores".to_string(), false);
let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
assert!(!ext.intraword_underscores);
}
#[test]
fn merge_with_flavor_ignores_unknown_extension_overrides() {
let mut overrides = HashMap::new();
overrides.insert("smart".to_string(), true);
overrides.insert("smart-quotes".to_string(), true);
let ext = Extensions::merge_with_flavor(overrides, Flavor::Gfm);
assert!(ext.strikeout, "known defaults should remain intact");
}
#[test]
fn lists_without_preceding_blankline_defaults_false_for_pandoc_and_gfm() {
assert!(!Extensions::for_flavor(Flavor::Pandoc).lists_without_preceding_blankline);
assert!(!Extensions::for_flavor(Flavor::Gfm).lists_without_preceding_blankline);
}
#[test]
fn merge_with_flavor_accepts_lists_without_preceding_blankline_override() {
let mut overrides = HashMap::new();
overrides.insert("lists-without-preceding-blankline".to_string(), true);
let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
assert!(ext.lists_without_preceding_blankline);
}
#[test]
fn four_space_rule_defaults_off_for_every_flavor() {
for flavor in [
Flavor::Pandoc,
Flavor::Quarto,
Flavor::RMarkdown,
Flavor::Gfm,
Flavor::CommonMark,
Flavor::MultiMarkdown,
] {
assert!(
!Extensions::for_flavor(flavor).four_space_rule,
"four_space_rule should be off by default for {flavor:?}"
);
}
}
#[test]
fn merge_with_flavor_accepts_four_space_rule_override() {
let mut overrides = HashMap::new();
overrides.insert("four-space-rule".to_string(), true);
let ext = Extensions::merge_with_flavor(overrides, Flavor::Pandoc);
assert!(ext.four_space_rule);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum PandocCompat {
#[cfg_attr(feature = "serde", serde(rename = "latest"))]
Latest,
#[cfg_attr(
feature = "serde",
serde(rename = "3.7", alias = "3-7", alias = "v3.7", alias = "v3-7")
)]
V3_7,
#[default]
#[cfg_attr(
feature = "serde",
serde(rename = "3.9", alias = "3-9", alias = "v3.9", alias = "v3-9")
)]
V3_9,
}
impl PandocCompat {
pub const PINNED_LATEST: Self = Self::V3_9;
pub fn effective(self) -> Self {
match self {
Self::Latest => Self::PINNED_LATEST,
other => other,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(rename_all = "kebab-case"))]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub enum Dialect {
#[default]
Pandoc,
CommonMark,
}
impl Dialect {
pub fn for_flavor(flavor: Flavor) -> Self {
match flavor {
Flavor::CommonMark | Flavor::Gfm => Dialect::CommonMark,
Flavor::Pandoc | Flavor::Quarto | Flavor::RMarkdown | Flavor::MultiMarkdown => {
Dialect::Pandoc
}
}
}
}
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "serde", serde(default, rename_all = "kebab-case"))]
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
pub struct ParserOptions {
pub flavor: Flavor,
pub dialect: Dialect,
pub extensions: Extensions,
pub pandoc_compat: PandocCompat,
#[cfg_attr(feature = "serde", serde(skip))]
pub refdef_labels: Option<Arc<HashSet<String>>>,
}
impl Default for ParserOptions {
fn default() -> Self {
let flavor = Flavor::default();
Self {
flavor,
dialect: Dialect::for_flavor(flavor),
extensions: Extensions::for_flavor(flavor),
pandoc_compat: PandocCompat::default(),
refdef_labels: None,
}
}
}
impl ParserOptions {
pub fn effective_pandoc_compat(&self) -> PandocCompat {
self.pandoc_compat.effective()
}
}
#[cfg(feature = "schema")]
impl schemars::JsonSchema for Flavor {
fn schema_name() -> std::borrow::Cow<'static, str> {
"Flavor".into()
}
fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
schemars::json_schema!({
"type": "string",
"description": "Markdown flavor to parse and format against.",
"enum": [
"pandoc",
"quarto",
"rmarkdown",
"gfm",
"common-mark",
"commonmark",
"multimarkdown"
]
})
}
}
#[cfg(feature = "schema")]
impl schemars::JsonSchema for PandocCompat {
fn schema_name() -> std::borrow::Cow<'static, str> {
"PandocCompat".into()
}
fn json_schema(_generator: &mut schemars::SchemaGenerator) -> schemars::Schema {
schemars::json_schema!({
"type": "string",
"description": "Compatibility target for ambiguous Pandoc behavior.",
"enum": [
"latest",
"3.7", "3-7", "v3.7", "v3-7",
"3.9", "3-9", "v3.9", "v3-9"
]
})
}
}