Skip to main content

satteri_pulldown_cmark/
lib.rs

1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to build an arena representation via [`parse()`].
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use satteri_pulldown_cmark::{parse, Options};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! let mut options = Options::empty();
36//! options.insert(Options::ENABLE_STRIKETHROUGH);
37//! let (arena, _) = parse(markdown_input, options);
38//! let html = satteri_ast::mdast_to_html(&arena);
39//!
40//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
41//! assert_eq!(expected_html, &html);
42//! ```
43//!
44//! Note that consecutive text events can happen due to the manner in which the
45//! parser evaluates the source. A utility `TextMergeStream` exists to improve
46//! the comfort of iterating the events:
47//!
48//! ```rust
49//! use satteri_pulldown_cmark::{Event, Parser, TextMergeStream};
50//!
51//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
52//!
53//! let iterator = TextMergeStream::new(Parser::new(markdown_input));
54//!
55//! for event in iterator {
56//!     match event {
57//!         Event::Text(text) => println!("{}", text),
58//!         _ => {}
59//!     }
60//! }
61//! ```
62//!
63#![warn(
64    clippy::alloc_instead_of_core,
65    clippy::std_instead_of_alloc,
66    clippy::std_instead_of_core
67)]
68// Forbid unsafe code unless the SIMD feature is enabled.
69#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
70#![warn(missing_debug_implementations)]
71#![cfg_attr(not(feature = "std"), no_std)]
72
73#[macro_use]
74extern crate alloc;
75
76#[cfg(feature = "std")]
77extern crate std;
78
79#[cfg(not(feature = "std"))]
80compile_error!("This crate requires the \"std\" feature.");
81
82use alloc::vec::Vec;
83
84#[cfg(feature = "serde")]
85use serde::{Deserialize, Serialize};
86
87pub mod utils;
88
89pub mod arena_build;
90mod entities;
91mod firstpass;
92mod linklabel;
93#[cfg(feature = "mdx")]
94mod mdx;
95mod parse;
96pub(crate) mod post_passes;
97mod puncttable;
98mod scanners;
99mod strings;
100mod tree;
101
102use core::fmt::Display;
103
104#[cfg(feature = "mdx")]
105pub use crate::arena_build::MDX_OPTIONS;
106pub use crate::{
107    arena_build::{parse, DEFAULT_OPTIONS},
108    parse::{
109        BrokenLink, BrokenLinkCallback, DefaultParserCallbacks, OffsetIter, Parser,
110        ParserCallbacks, RefDefs,
111    },
112    strings::{CowStr, InlineStr},
113    utils::*,
114};
115
116/// Codeblock kind.
117#[derive(Clone, Debug, PartialEq)]
118#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
119pub enum CodeBlockKind<'a> {
120    Indented,
121    /// The value contained in the tag describes the language of the code, which may be empty.
122    #[cfg_attr(feature = "serde", serde(borrow))]
123    Fenced(CowStr<'a>),
124}
125
126impl<'a> CodeBlockKind<'a> {
127    pub fn is_indented(&self) -> bool {
128        matches!(*self, CodeBlockKind::Indented)
129    }
130
131    pub fn is_fenced(&self) -> bool {
132        matches!(*self, CodeBlockKind::Fenced(_))
133    }
134
135    pub fn into_static(self) -> CodeBlockKind<'static> {
136        match self {
137            CodeBlockKind::Indented => CodeBlockKind::Indented,
138            CodeBlockKind::Fenced(s) => CodeBlockKind::Fenced(s.into_static()),
139        }
140    }
141}
142
143/// BlockQuote kind (Note, Tip, Important, Warning, Caution).
144#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
145#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
146pub enum BlockQuoteKind {
147    Note,
148    Tip,
149    Important,
150    Warning,
151    Caution,
152}
153
154/// Directive kind.
155#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
156#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
157pub enum DirectiveKind {
158    Container,
159    Leaf,
160    Text,
161}
162
163/// Metadata block kind.
164#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
165#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
166pub enum MetadataBlockKind {
167    YamlStyle,
168    PlusesStyle,
169}
170
171/// Tags for elements that can contain other elements.
172#[derive(Clone, Debug, PartialEq)]
173#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
174pub enum Tag<'a> {
175    /// A paragraph of text and other inline elements.
176    Paragraph,
177
178    /// A heading, with optional identifier, classes and custom attributes.
179    /// The identifier is prefixed with `#` and the last one in the attributes
180    /// list is chosen, classes are prefixed with `.` and custom attributes
181    /// have no prefix and can optionally have a value (`myattr` or `myattr=myvalue`).
182    ///
183    /// `id`, `classes` and `attrs` are only parsed and populated with [`Options::ENABLE_HEADING_ATTRIBUTES`], `None` or empty otherwise.
184    Heading {
185        level: HeadingLevel,
186        id: Option<CowStr<'a>>,
187        classes: Vec<CowStr<'a>>,
188        /// The first item of the tuple is the attr and second one the value.
189        attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
190    },
191
192    /// A block quote.
193    ///
194    /// The `BlockQuoteKind` is only parsed & populated with [`Options::ENABLE_GFM`], `None` otherwise.
195    ///
196    /// ```markdown
197    /// > regular quote
198    ///
199    /// > [!NOTE]
200    /// > note quote
201    /// ```
202    BlockQuote(Option<BlockQuoteKind>),
203    /// A code block.
204    CodeBlock(CodeBlockKind<'a>),
205    /// A directive (container, leaf, or text).
206    /// Only parsed and emitted with [`Options::ENABLE_DIRECTIVE`].
207    Directive {
208        kind: DirectiveKind,
209        name: CowStr<'a>,
210        attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
211    },
212
213    /// An HTML block.
214    ///
215    /// A line that begins with some predefined tags (HTML block tags) (see [CommonMark Spec](https://spec.commonmark.org/0.31.2/#html-blocks) for more details) or any tag that is followed only by whitespace.
216    ///
217    /// Most HTML blocks end on an empty line, though some e.g. `<pre>` like `<script>` or `<!-- Comments -->` don't.
218    /// ```markdown
219    /// <body> Is HTML block even though here is non-whitespace.
220    /// Block ends on an empty line.
221    ///
222    /// <some-random-tag>
223    /// This is HTML block.
224    ///
225    /// <pre> Doesn't end on empty lines.
226    ///
227    /// This is still the same block.</pre>
228    /// ```
229    HtmlBlock,
230
231    /// A list. If the list is ordered the first field indicates the number of the first item.
232    /// The second field is `true` when the list is tight (no blank lines between items).
233    /// Contains only list items.
234    List(Option<u64>, bool),
235    /// A list item.
236    Item,
237    /// A footnote definition. The value contained is the footnote's label by which it can
238    /// be referred to.
239    ///
240    /// Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
241    #[cfg_attr(feature = "serde", serde(borrow))]
242    FootnoteDefinition(CowStr<'a>),
243
244    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
245    DefinitionList,
246    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
247    DefinitionListTitle,
248    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
249    DefinitionListDefinition,
250
251    /// A table. Contains a vector describing the text-alignment for each of its columns.
252    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
253    Table(Vec<Alignment>),
254    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
255    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
256    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
257    TableHead,
258    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
259    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
260    TableRow,
261    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
262    TableCell,
263
264    // span-level tags
265    /// [Emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
266    /// ```markdown
267    /// half*emph* _strong_ _multi _level__
268    /// ```
269    Emphasis,
270    /// [Strong emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
271    /// ```markdown
272    /// half**strong** __strong__ __multi __level____
273    /// ```
274    Strong,
275    /// Only parsed and emitted with [`Options::ENABLE_STRIKETHROUGH`].
276    ///
277    /// ```markdown
278    /// ~strike through~
279    /// ```
280    Strikethrough,
281    /// Only parsed and emitted with [`Options::ENABLE_SUPERSCRIPT`].
282    ///
283    /// ```markdown
284    /// ^superscript^
285    /// ```
286    Superscript,
287    /// Only parsed and emitted with [`Options::ENABLE_SUBSCRIPT`], if disabled `~something~` is parsed as [`Strikethrough`](Self::Strikethrough).
288    /// ```markdown
289    /// ~subscript~ ~~if also enabled this is strikethrough~~
290    /// ```
291    Subscript,
292
293    /// A link.
294    Link {
295        link_type: LinkType,
296        dest_url: CowStr<'a>,
297        title: CowStr<'a>,
298        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
299        id: CowStr<'a>,
300    },
301
302    /// An image. The first field is the link type, the second the destination URL and the third is a title,
303    /// the fourth is the link identifier.
304    Image {
305        link_type: LinkType,
306        dest_url: CowStr<'a>,
307        title: CowStr<'a>,
308        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
309        id: CowStr<'a>,
310    },
311
312    /// A metadata block.
313    /// Only parsed and emitted with [`Options::ENABLE_YAML_STYLE_METADATA_BLOCKS`]
314    /// or [`Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS`].
315    MetadataBlock(MetadataBlockKind),
316
317    /// An MDX JSX element (flow-level, i.e. block).
318    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
319    /// The `CowStr` is the raw JSX tag content (e.g. `Component x={1}`).
320    /// ```mdx
321    /// <Component x={1}>
322    ///   children
323    /// </Component>
324    /// ```
325    #[cfg(feature = "mdx")]
326    #[cfg_attr(feature = "serde", serde(borrow))]
327    MdxJsxFlowElement(CowStr<'a>),
328
329    /// An MDX JSX element (text-level, i.e. inline).
330    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
331    #[cfg(feature = "mdx")]
332    #[cfg_attr(feature = "serde", serde(borrow))]
333    MdxJsxTextElement(CowStr<'a>),
334}
335
336impl<'a> Tag<'a> {
337    pub fn to_end(&self) -> TagEnd {
338        match self {
339            Tag::Paragraph => TagEnd::Paragraph,
340            Tag::Heading { level, .. } => TagEnd::Heading(*level),
341            Tag::BlockQuote(kind) => TagEnd::BlockQuote(*kind),
342            Tag::CodeBlock(_) => TagEnd::CodeBlock,
343            Tag::Directive { kind, .. } => TagEnd::Directive(*kind),
344            Tag::HtmlBlock => TagEnd::HtmlBlock,
345            Tag::List(number, _) => TagEnd::List(number.is_some()),
346            Tag::Item => TagEnd::Item,
347            Tag::FootnoteDefinition(_) => TagEnd::FootnoteDefinition,
348            Tag::Table(_) => TagEnd::Table,
349            Tag::TableHead => TagEnd::TableHead,
350            Tag::TableRow => TagEnd::TableRow,
351            Tag::TableCell => TagEnd::TableCell,
352            Tag::Subscript => TagEnd::Subscript,
353            Tag::Superscript => TagEnd::Superscript,
354            Tag::Emphasis => TagEnd::Emphasis,
355            Tag::Strong => TagEnd::Strong,
356            Tag::Strikethrough => TagEnd::Strikethrough,
357            Tag::Link { .. } => TagEnd::Link,
358            Tag::Image { .. } => TagEnd::Image,
359            Tag::MetadataBlock(kind) => TagEnd::MetadataBlock(*kind),
360            Tag::DefinitionList => TagEnd::DefinitionList,
361            Tag::DefinitionListTitle => TagEnd::DefinitionListTitle,
362            Tag::DefinitionListDefinition => TagEnd::DefinitionListDefinition,
363            #[cfg(feature = "mdx")]
364            Tag::MdxJsxFlowElement(_) => TagEnd::MdxJsxFlowElement,
365            #[cfg(feature = "mdx")]
366            Tag::MdxJsxTextElement(_) => TagEnd::MdxJsxTextElement,
367        }
368    }
369
370    pub fn into_static(self) -> Tag<'static> {
371        match self {
372            Tag::Paragraph => Tag::Paragraph,
373            Tag::Heading {
374                level,
375                id,
376                classes,
377                attrs,
378            } => Tag::Heading {
379                level,
380                id: id.map(|s| s.into_static()),
381                classes: classes.into_iter().map(|s| s.into_static()).collect(),
382                attrs: attrs
383                    .into_iter()
384                    .map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
385                    .collect(),
386            },
387            Tag::BlockQuote(k) => Tag::BlockQuote(k),
388            Tag::CodeBlock(kb) => Tag::CodeBlock(kb.into_static()),
389            Tag::Directive {
390                kind,
391                name,
392                attributes,
393            } => Tag::Directive {
394                kind,
395                name: name.into_static(),
396                attributes: attributes
397                    .into_iter()
398                    .map(|(k, v)| (k.into_static(), v.into_static()))
399                    .collect(),
400            },
401            Tag::HtmlBlock => Tag::HtmlBlock,
402            Tag::List(v, t) => Tag::List(v, t),
403            Tag::Item => Tag::Item,
404            Tag::FootnoteDefinition(a) => Tag::FootnoteDefinition(a.into_static()),
405            Tag::Table(v) => Tag::Table(v),
406            Tag::TableHead => Tag::TableHead,
407            Tag::TableRow => Tag::TableRow,
408            Tag::TableCell => Tag::TableCell,
409            Tag::Emphasis => Tag::Emphasis,
410            Tag::Strong => Tag::Strong,
411            Tag::Strikethrough => Tag::Strikethrough,
412            Tag::Superscript => Tag::Superscript,
413            Tag::Subscript => Tag::Subscript,
414            Tag::Link {
415                link_type,
416                dest_url,
417                title,
418                id,
419            } => Tag::Link {
420                link_type,
421                dest_url: dest_url.into_static(),
422                title: title.into_static(),
423                id: id.into_static(),
424            },
425            Tag::Image {
426                link_type,
427                dest_url,
428                title,
429                id,
430            } => Tag::Image {
431                link_type,
432                dest_url: dest_url.into_static(),
433                title: title.into_static(),
434                id: id.into_static(),
435            },
436            Tag::MetadataBlock(v) => Tag::MetadataBlock(v),
437            Tag::DefinitionList => Tag::DefinitionList,
438            Tag::DefinitionListTitle => Tag::DefinitionListTitle,
439            Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
440            #[cfg(feature = "mdx")]
441            Tag::MdxJsxFlowElement(s) => Tag::MdxJsxFlowElement(s.into_static()),
442            #[cfg(feature = "mdx")]
443            Tag::MdxJsxTextElement(s) => Tag::MdxJsxTextElement(s.into_static()),
444        }
445    }
446}
447
448/// The end of a `Tag`.
449#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
450#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
451pub enum TagEnd {
452    Paragraph,
453    Heading(HeadingLevel),
454
455    BlockQuote(Option<BlockQuoteKind>),
456    CodeBlock,
457    Directive(DirectiveKind),
458
459    HtmlBlock,
460
461    /// A list, `true` for ordered lists.
462    List(bool),
463    Item,
464    FootnoteDefinition,
465
466    DefinitionList,
467    DefinitionListTitle,
468    DefinitionListDefinition,
469
470    Table,
471    TableHead,
472    TableRow,
473    TableCell,
474
475    Emphasis,
476    Strong,
477    Strikethrough,
478    Superscript,
479    Subscript,
480
481    Link,
482    Image,
483
484    MetadataBlock(MetadataBlockKind),
485
486    #[cfg(feature = "mdx")]
487    MdxJsxFlowElement,
488    #[cfg(feature = "mdx")]
489    MdxJsxTextElement,
490}
491
492/// Make sure `TagEnd` is no more than two bytes in size.
493/// This is why it's used instead of just using `Tag`.
494#[cfg(target_pointer_width = "64")]
495const _STATIC_ASSERT_TAG_END_SIZE: [(); 2] = [(); core::mem::size_of::<TagEnd>()];
496
497impl<'a> From<Tag<'a>> for TagEnd {
498    fn from(value: Tag) -> Self {
499        value.to_end()
500    }
501}
502
503#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
504#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
505pub enum HeadingLevel {
506    H1 = 1,
507    H2,
508    H3,
509    H4,
510    H5,
511    H6,
512}
513
514impl Display for HeadingLevel {
515    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
516        match self {
517            Self::H1 => write!(f, "h1"),
518            Self::H2 => write!(f, "h2"),
519            Self::H3 => write!(f, "h3"),
520            Self::H4 => write!(f, "h4"),
521            Self::H5 => write!(f, "h5"),
522            Self::H6 => write!(f, "h6"),
523        }
524    }
525}
526
527/// Returned when trying to convert a `usize` into a `Heading` but it fails
528/// because the usize isn't a valid heading level
529#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
530pub struct InvalidHeadingLevel(usize);
531
532impl TryFrom<usize> for HeadingLevel {
533    type Error = InvalidHeadingLevel;
534
535    fn try_from(value: usize) -> Result<Self, Self::Error> {
536        match value {
537            1 => Ok(Self::H1),
538            2 => Ok(Self::H2),
539            3 => Ok(Self::H3),
540            4 => Ok(Self::H4),
541            5 => Ok(Self::H5),
542            6 => Ok(Self::H6),
543            _ => Err(InvalidHeadingLevel(value)),
544        }
545    }
546}
547
548/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
549#[derive(Clone, Debug, PartialEq, Copy)]
550#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
551pub enum LinkType {
552    /// Inline link like `[foo](bar)`
553    Inline,
554    /// Reference link like `[foo][bar]`
555    Reference,
556    /// Reference without destination in the document, but resolved by the broken_link_callback
557    ReferenceUnknown,
558    /// Collapsed link like `[foo][]`
559    Collapsed,
560    /// Collapsed link without destination in the document, but resolved by the broken_link_callback
561    CollapsedUnknown,
562    /// Shortcut link like `[foo]`
563    Shortcut,
564    /// Shortcut without destination in the document, but resolved by the broken_link_callback
565    ShortcutUnknown,
566    /// Autolink like `<http://foo.bar/baz>`
567    Autolink,
568    /// Email address in autolink like `<john@example.org>`
569    Email,
570    /// Wikilink link like `[[foo]]` or `[[foo|bar]]`
571    WikiLink {
572        /// `true` if the wikilink was piped.
573        ///
574        /// * `true` - `[[foo|bar]]`
575        /// * `false` - `[[foo]]`
576        has_pothole: bool,
577    },
578}
579
580impl LinkType {
581    /// Map the link type to an equivalent _Unknown link type.
582    fn to_unknown(self) -> Self {
583        match self {
584            LinkType::Reference => LinkType::ReferenceUnknown,
585            LinkType::Collapsed => LinkType::CollapsedUnknown,
586            LinkType::Shortcut => LinkType::ShortcutUnknown,
587            _ => unreachable!(),
588        }
589    }
590}
591
592/// Markdown events that are generated in a preorder traversal of the document
593/// tree, with additional `End` events whenever all of an inner node's children
594/// have been visited.
595#[derive(Clone, Debug, PartialEq)]
596#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
597pub enum Event<'a> {
598    /// Start of a tagged element. Events that are yielded after this event
599    /// and before its corresponding `End` event are inside this element.
600    /// Start and end events are guaranteed to be balanced.
601    #[cfg_attr(feature = "serde", serde(borrow))]
602    Start(Tag<'a>),
603    /// End of a tagged element.
604    End(TagEnd),
605    /// A text node.
606    ///
607    /// All text, outside and inside [`Tag`]s.
608    #[cfg_attr(feature = "serde", serde(borrow))]
609    Text(CowStr<'a>),
610    /// An [inline code node](https://spec.commonmark.org/0.31.2/#code-spans).
611    ///
612    /// ```markdown
613    /// `code`
614    /// ```
615    #[cfg_attr(feature = "serde", serde(borrow))]
616    Code(CowStr<'a>),
617    /// An inline math environment node.
618    /// Requires [`Options::ENABLE_MATH`].
619    ///
620    /// ```markdown
621    /// $math$
622    /// ```
623    #[cfg_attr(feature = "serde", serde(borrow))]
624    InlineMath(CowStr<'a>),
625    /// A display math environment node.
626    /// Requires [`Options::ENABLE_MATH`].
627    ///
628    /// ```markdown
629    /// $$math$$
630    /// ```
631    #[cfg_attr(feature = "serde", serde(borrow))]
632    DisplayMath(CowStr<'a>),
633    /// An HTML node.
634    ///
635    /// A line of HTML inside [`Tag::HtmlBlock`] includes the line break.
636    #[cfg_attr(feature = "serde", serde(borrow))]
637    Html(CowStr<'a>),
638    /// An [inline HTML node](https://spec.commonmark.org/0.31.2/#raw-html).
639    ///
640    /// Contains only the tag itself, e.g. `<open-tag>`, `</close-tag>` or `<!-- comment -->`.
641    ///
642    /// **Note**: Under some conditions HTML can also be parsed as an HTML Block, see [`Tag::HtmlBlock`] for details.
643    #[cfg_attr(feature = "serde", serde(borrow))]
644    InlineHtml(CowStr<'a>),
645    /// A reference to a footnote with given label, defined
646    /// by an event with a [`Tag::FootnoteDefinition`] tag. Definitions and references to them may
647    /// occur in any order. Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
648    ///
649    /// ```markdown
650    /// [^1]
651    /// ```
652    #[cfg_attr(feature = "serde", serde(borrow))]
653    FootnoteReference(CowStr<'a>),
654    /// A [soft line break](https://spec.commonmark.org/0.31.2/#soft-line-breaks).
655    ///
656    /// Any line break that isn't a [`HardBreak`](Self::HardBreak), or the end of e.g. a paragraph.
657    SoftBreak,
658    /// A [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks).
659    ///
660    /// A line ending that is either preceded by at least two spaces or `\`.
661    ///
662    /// ```markdown
663    /// hard··
664    /// line\
665    /// breaks
666    /// ```
667    /// *`·` is a space*
668    HardBreak,
669    /// A horizontal ruler.
670    ///
671    /// ```markdown
672    /// ***
673    /// ···---
674    /// _·_··_····_··
675    /// ```
676    /// *`·` is any whitespace*
677    Rule,
678    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
679    /// Only parsed and emitted with [`Options::ENABLE_TASKLISTS`].
680    /// ```markdown
681    /// - [ ] unchecked
682    /// - [x] checked
683    /// ```
684    TaskListMarker(bool),
685
686    /// An MDX flow expression (block-level).
687    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
688    /// ```mdx
689    /// {1 + 1}
690    /// ```
691    #[cfg(feature = "mdx")]
692    #[cfg_attr(feature = "serde", serde(borrow))]
693    MdxFlowExpression(CowStr<'a>),
694
695    /// An MDX text expression (inline).
696    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
697    /// ```mdx
698    /// a]n {expression} here
699    /// ```
700    #[cfg(feature = "mdx")]
701    #[cfg_attr(feature = "serde", serde(borrow))]
702    MdxTextExpression(CowStr<'a>),
703
704    /// An MDX ESM block (import/export at document level).
705    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
706    /// ```mdx
707    /// import {Chart} from './chart.js'
708    /// export const meta = {}
709    /// ```
710    #[cfg(feature = "mdx")]
711    #[cfg_attr(feature = "serde", serde(borrow))]
712    MdxEsm(CowStr<'a>),
713}
714
715impl<'a> Event<'a> {
716    pub fn into_static(self) -> Event<'static> {
717        match self {
718            Event::Start(t) => Event::Start(t.into_static()),
719            Event::End(e) => Event::End(e),
720            Event::Text(s) => Event::Text(s.into_static()),
721            Event::Code(s) => Event::Code(s.into_static()),
722            Event::InlineMath(s) => Event::InlineMath(s.into_static()),
723            Event::DisplayMath(s) => Event::DisplayMath(s.into_static()),
724            Event::Html(s) => Event::Html(s.into_static()),
725            Event::InlineHtml(s) => Event::InlineHtml(s.into_static()),
726            Event::FootnoteReference(s) => Event::FootnoteReference(s.into_static()),
727            Event::SoftBreak => Event::SoftBreak,
728            Event::HardBreak => Event::HardBreak,
729            Event::Rule => Event::Rule,
730            Event::TaskListMarker(b) => Event::TaskListMarker(b),
731            #[cfg(feature = "mdx")]
732            Event::MdxFlowExpression(s) => Event::MdxFlowExpression(s.into_static()),
733            #[cfg(feature = "mdx")]
734            Event::MdxTextExpression(s) => Event::MdxTextExpression(s.into_static()),
735            #[cfg(feature = "mdx")]
736            Event::MdxEsm(s) => Event::MdxEsm(s.into_static()),
737        }
738    }
739}
740
741/// Table column text alignment.
742#[derive(Copy, Clone, Debug, PartialEq)]
743#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
744pub enum Alignment {
745    /// Default text alignment.
746    None,
747    Left,
748    Center,
749    Right,
750}
751
752bitflags::bitflags! {
753    /// Option struct containing flags for enabling extra features
754    /// that are not part of the CommonMark spec.
755    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
756    pub struct Options: u32 {
757        const ENABLE_TABLES = 1 << 1;
758        /// GitHub-compatible footnote syntax.
759        ///
760        /// Footnotes are referenced with the syntax `[^IDENT]`,
761        /// and defined with an identifier followed by a colon at top level.
762        ///
763        /// ---
764        ///
765        /// ```markdown
766        /// Footnote referenced [^1].
767        ///
768        /// [^1]: footnote defined
769        /// ```
770        ///
771        /// Footnote referenced [^1].
772        ///
773        /// [^1]: footnote defined
774        const ENABLE_FOOTNOTES = 1 << 2;
775        const ENABLE_STRIKETHROUGH = 1 << 3;
776        const ENABLE_TASKLISTS = 1 << 4;
777        /// Enables replacement of ASCII punctuation characters with
778        /// Unicode ligatures and smart quotes.
779        ///
780        /// This includes replacing `--` with `–`, `---` with `—`, `...` with `…`,
781        /// `”quote”` with `\u{201c}quote\u{201d}`, and `’quote’` with `\u{2018}quote\u{2019}`.
782        ///
783        /// Equivalent to enabling all of `ENABLE_SMART_QUOTES`,
784        /// `ENABLE_SMART_DASHES`, and `ENABLE_SMART_ELLIPSES`.
785        const ENABLE_SMART_PUNCTUATION = 1 << 5;
786        /// Replace straight quotes (`”`, `’`) with curly/smart quotes.
787        const ENABLE_SMART_QUOTES = 1 << 18;
788        /// Replace `--` with en-dash and `---` with em-dash.
789        const ENABLE_SMART_DASHES = 1 << 19;
790        /// Replace `...` with ellipsis (`…`).
791        const ENABLE_SMART_ELLIPSES = 1 << 20;
792        /// Extension to allow headings to have ID and classes.
793        ///
794        /// `# text { #id .class1 .class2 myattr other_attr=myvalue }`
795        /// is interpreted as a level 1 heading
796        /// with the content `text`, ID `id`, classes `class1` and `class2` and
797        /// custom attributes `myattr` (without value) and
798        /// `other_attr` with value `myvalue`.
799        /// Note that ID, classes, and custom attributes should be space-separated.
800        const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
801        /// Metadata blocks in YAML style, i.e.:
802        /// - starting with a `---` line
803        /// - ending with a `---` or `...` line
804        const ENABLE_YAML_STYLE_METADATA_BLOCKS = 1 << 7;
805        /// Metadata blocks delimited by:
806        /// - `+++` line at start
807        /// - `+++` line at end
808        const ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS = 1 << 8;
809        /// Emits `Event::InlineMath` and `Event::DisplayMath` for TeX formulas.
810        /// Umbrella over [`ENABLE_MATH_SINGLE_DOLLAR`](Self::ENABLE_MATH_SINGLE_DOLLAR)
811        /// and [`ENABLE_MATH_MULTI_DOLLAR`](Self::ENABLE_MATH_MULTI_DOLLAR).
812        const ENABLE_MATH = 1 << 10;
813        /// Single-dollar inline math (`$x$`).
814        const ENABLE_MATH_SINGLE_DOLLAR = 1 << 22;
815        /// Multi-dollar math: inline `$$x$$` and `$$` block fences.
816        const ENABLE_MATH_MULTI_DOLLAR = 1 << 23;
817        /// Misc GitHub Flavored Markdown features not supported in CommonMark.
818        const ENABLE_GFM = 1 << 11;
819        /// GitHub-style blockquote alerts ([!NOTE], [!TIP], [!IMPORTANT], [!WARNING], [!CAUTION]).
820        /// Not part of the GFM spec — this is a GitHub-specific feature.
821        const ENABLE_GITHUB_ALERTS = 1 << 21;
822        /// Commonmark-HS-Extensions compatible definition lists.
823        ///
824        /// ```markdown
825        /// title 1
826        ///   : definition 1
827        ///
828        /// title 2
829        ///   : definition 2a
830        ///   : definition 2b
831        /// ```
832        const ENABLE_DEFINITION_LIST = 1 << 12;
833        const ENABLE_SUPERSCRIPT = 1 << 13;
834        const ENABLE_SUBSCRIPT = 1 << 14;
835        /// Obsidian-style Wikilinks.
836        const ENABLE_WIKILINKS = 1 << 15;
837        /// Directives: container (:::), leaf (::), and text (:) directives.
838        const ENABLE_DIRECTIVE = 1 << 16;
839        /// MDX: enables JSX elements, expressions, and ESM import/export.
840        const ENABLE_MDX = 1 << 17;
841    }
842}
843
844impl Options {
845    pub(crate) fn has_smart_quotes(&self) -> bool {
846        self.contains(Options::ENABLE_SMART_PUNCTUATION)
847            || self.contains(Options::ENABLE_SMART_QUOTES)
848    }
849
850    pub(crate) fn has_smart_dashes(&self) -> bool {
851        self.contains(Options::ENABLE_SMART_PUNCTUATION)
852            || self.contains(Options::ENABLE_SMART_DASHES)
853    }
854
855    pub(crate) fn has_smart_ellipses(&self) -> bool {
856        self.contains(Options::ENABLE_SMART_PUNCTUATION)
857            || self.contains(Options::ENABLE_SMART_ELLIPSES)
858    }
859
860    pub(crate) fn has_math(&self) -> bool {
861        self.intersects(
862            Options::ENABLE_MATH
863                | Options::ENABLE_MATH_SINGLE_DOLLAR
864                | Options::ENABLE_MATH_MULTI_DOLLAR,
865        )
866    }
867}