Skip to main content

mdwright_document/
lib.rs

1#![forbid(unsafe_code)]
2
3mod document;
4mod error;
5mod format_facts;
6mod gfm;
7mod heading;
8mod ir;
9mod line_index;
10mod parse;
11mod refs;
12mod render;
13mod signature;
14mod source;
15mod tree;
16mod util;
17
18pub use document::{Document, render_html, render_html_with_options, render_html_with_render_options};
19pub use error::ParseError;
20pub use format_facts::{
21    HeadingAttrSite, InlineDelimiterKind, InlineDelimiterSlot, InlineLinkDestinationSlot, OrderedListMarkerSite,
22    ParagraphHardBreak, ReferenceDefinitionSite, StructuralKind, StructuralSpan, TableCellSite, TableRowSite,
23    TableSite, UnorderedListMarkerSite, WrappableParagraph,
24};
25pub use gfm::{AutolinkFact, AutolinkOrigin};
26pub use heading::HeadingAttrs;
27pub use ir::{
28    AllowScope, BlockCheckpointFact, CodeBlock, Frontmatter, FrontmatterDelimiter, Heading, HtmlBlock, InlineCode,
29    InlineHtml, LinkDef, ListGroup, ListItem, Suppression, SuppressionKind, TextSlice,
30};
31pub use line_index::{LineIndex, LineIndexError};
32pub use mdwright_math::{MathError, MathRegion, MathSpan};
33pub use render::{RenderOptions, RenderProfile};
34pub use signature::{MarkdownSignature, markdown_signature};
35pub use tree::TableAlign;
36
37/// Markdown recognition policy.
38#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
39pub struct ParseOptions {
40    extensions: ExtensionOptions,
41    math: MathParseOptions,
42}
43
44impl ParseOptions {
45    /// Extension-recognition toggles.
46    #[must_use]
47    pub fn extensions(&self) -> ExtensionOptions {
48        self.extensions
49    }
50
51    /// Math-source recognition policy.
52    #[must_use]
53    pub fn math(&self) -> MathParseOptions {
54        self.math
55    }
56
57    /// Override extension-recognition toggles.
58    #[must_use]
59    pub fn with_extensions(mut self, extensions: ExtensionOptions) -> Self {
60        self.extensions = extensions;
61        self
62    }
63
64    /// Override math-source recognition policy.
65    #[must_use]
66    pub fn with_math(mut self, math: MathParseOptions) -> Self {
67        self.math = math;
68        self
69    }
70}
71
72/// Math delimiter recognition policy.
73#[derive(Copy, Clone, Debug, PartialEq, Eq)]
74pub struct MathParseOptions {
75    pub delimiters: MathDelimiterSet,
76}
77
78impl Default for MathParseOptions {
79    fn default() -> Self {
80        Self {
81            delimiters: MathDelimiterSet::Tex,
82        }
83    }
84}
85
86impl MathParseOptions {
87    pub(crate) fn scanner_config(self) -> mdwright_math::MathConfig {
88        let mut cfg = mdwright_math::MathConfig::default();
89        match self.delimiters {
90            MathDelimiterSet::Tex => {}
91            MathDelimiterSet::Github => {
92                cfg.double_dollar = true;
93                cfg.single_dollar = true;
94            }
95        }
96        cfg
97    }
98}
99
100/// Named math delimiter sets recognised by the Markdown parser.
101#[derive(Copy, Clone, Debug, PartialEq, Eq, Default)]
102pub enum MathDelimiterSet {
103    /// TeX delimiters: `\(...\)`, `\[...\]`, and LaTeX environments.
104    #[default]
105    Tex,
106    /// GitHub-style dollar math, plus the TeX delimiters.
107    Github,
108}
109
110/// Per-extension recognition toggles.
111#[derive(Copy, Clone, Debug, PartialEq, Eq)]
112#[allow(
113    clippy::struct_excessive_bools,
114    reason = "one toggle per mdformat-mkdocs extension; the parallel naming with the TOML schema is intentional"
115)]
116pub struct ExtensionOptions {
117    pub gfm: GfmOptions,
118    pub definition_lists: bool,
119    pub abbreviation_lists: bool,
120    pub heading_attribute_lists: bool,
121    pub block_attribute_lists: bool,
122    pub myst: MystOptions,
123    pub pandoc: PandocOptions,
124}
125
126impl Default for ExtensionOptions {
127    fn default() -> Self {
128        Self {
129            gfm: GfmOptions::default(),
130            definition_lists: true,
131            abbreviation_lists: true,
132            heading_attribute_lists: true,
133            block_attribute_lists: true,
134            myst: MystOptions::default(),
135            pandoc: PandocOptions::default(),
136        }
137    }
138}
139
140/// Recognition toggles for GitHub Flavored Markdown extensions.
141#[derive(Copy, Clone, Debug, PartialEq, Eq)]
142pub struct GfmOptions {
143    pub autolinks: GfmAutolinkPolicy,
144    pub tagfilter: bool,
145}
146
147impl Default for GfmOptions {
148    fn default() -> Self {
149        Self {
150            autolinks: GfmAutolinkPolicy::UrlsAndEmails,
151            tagfilter: true,
152        }
153    }
154}
155
156/// GFM extended-autolink recognition policy.
157#[derive(Copy, Clone, Debug, PartialEq, Eq)]
158pub enum GfmAutolinkPolicy {
159    Disabled,
160    Urls,
161    UrlsAndEmails,
162}
163
164/// Recognition toggles for `MyST`-flavoured extensions.
165#[derive(Copy, Clone, Debug, PartialEq, Eq)]
166#[allow(
167    clippy::struct_excessive_bools,
168    reason = "one toggle per MyST construct; recognition gates are independent"
169)]
170pub struct MystOptions {
171    pub directive_containers: bool,
172    pub inline_roles: bool,
173    pub substitution_references: bool,
174    pub comments: bool,
175}
176
177impl Default for MystOptions {
178    fn default() -> Self {
179        Self {
180            directive_containers: true,
181            inline_roles: true,
182            substitution_references: true,
183            comments: true,
184        }
185    }
186}
187
188/// Recognition toggles for `Pandoc`-flavoured extensions.
189#[derive(Copy, Clone, Debug, PartialEq, Eq)]
190#[allow(
191    clippy::struct_excessive_bools,
192    reason = "one toggle per Pandoc construct; recognition gates are independent"
193)]
194pub struct PandocOptions {
195    pub fenced_divs: bool,
196    pub short_form_divs: bool,
197    pub inline_attribute_spans: bool,
198}
199
200impl Default for PandocOptions {
201    fn default() -> Self {
202        Self {
203            fenced_divs: true,
204            short_form_divs: true,
205            inline_attribute_spans: true,
206        }
207    }
208}
209
210/// Input-boundary predicate: returns `true` when `s` carries a C0
211/// control byte that mdwright treats as evidence the input is not
212/// well-formed Markdown.
213///
214/// Allowed bytes inside `0x00..=0x1f`: TAB (`0x09`), LF (`0x0a`),
215/// FF (`0x0c`), CR (`0x0d`). Everything else in C0 is rejected. DEL
216/// (`0x7f`) is not rejected; `CommonMark` accepts it verbatim and real
217/// documents occasionally carry it.
218#[must_use]
219pub fn contains_rejected_control_chars(s: &str) -> bool {
220    s.bytes().any(|b| matches!(b, 0x00..=0x08 | 0x0B | 0x0E..=0x1F))
221}
222
223#[cfg(test)]
224mod tests {
225    use super::{Document, MathDelimiterSet, MathParseOptions, ParseOptions, contains_rejected_control_chars};
226
227    #[test]
228    fn control_char_predicate_accepts_clean_text() {
229        assert!(!contains_rejected_control_chars(""));
230        assert!(!contains_rejected_control_chars("# hello\n\nworld\n"));
231        assert!(!contains_rejected_control_chars("tab\there\tand\nlf\n"));
232        assert!(!contains_rejected_control_chars("ff:\x0c, cr:\r\n"));
233        assert!(!contains_rejected_control_chars("café — 한글 — 𝓜"));
234        assert!(!contains_rejected_control_chars("del:\x7f"));
235    }
236
237    #[test]
238    fn control_char_predicate_rejects_c0_controls() {
239        assert!(contains_rejected_control_chars("nul:\0"));
240        assert!(contains_rejected_control_chars("bell:\x07"));
241        assert!(contains_rejected_control_chars("unit-sep:\x1f"));
242    }
243
244    #[test]
245    fn default_parse_options_do_not_recognize_dollar_math() -> Result<(), Box<dyn std::error::Error>> {
246        let doc = Document::parse("x is $a + b$\n")?;
247        assert!(doc.math_regions().is_empty());
248        Ok(())
249    }
250
251    #[test]
252    fn github_math_delimiters_recognize_dollar_math() -> Result<(), Box<dyn std::error::Error>> {
253        let opts = ParseOptions::default().with_math(MathParseOptions {
254            delimiters: MathDelimiterSet::Github,
255        });
256        let doc = Document::parse_with_options("x is $a + b$ and $$c + d$$\n", opts)?;
257        assert_eq!(doc.math_regions().len(), 2);
258        Ok(())
259    }
260}