Skip to main content

satteri_pulldown_cmark/
lib.rs

1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to build an arena representation via [`parse()`].
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use satteri_pulldown_cmark::{parse, Options};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! let mut options = Options::empty();
36//! options.insert(Options::ENABLE_STRIKETHROUGH);
37//! let (arena, _) = parse(markdown_input, options);
38//! let html = satteri_ast::mdast_to_html(&arena);
39//!
40//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
41//! assert_eq!(expected_html, &html);
42//! ```
43//!
44//! Note that consecutive text events can happen due to the manner in which the
45//! parser evaluates the source. A utility `TextMergeStream` exists to improve
46//! the comfort of iterating the events:
47//!
48//! ```rust
49//! use satteri_pulldown_cmark::{Event, Parser, TextMergeStream};
50//!
51//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
52//!
53//! let iterator = TextMergeStream::new(Parser::new(markdown_input));
54//!
55//! for event in iterator {
56//!     match event {
57//!         Event::Text(text) => println!("{}", text),
58//!         _ => {}
59//!     }
60//! }
61//! ```
62//!
63#![warn(
64    clippy::alloc_instead_of_core,
65    clippy::std_instead_of_alloc,
66    clippy::std_instead_of_core
67)]
68// Forbid unsafe code unless the SIMD feature is enabled.
69#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
70#![warn(missing_debug_implementations)]
71#![cfg_attr(not(feature = "std"), no_std)]
72
73#[macro_use]
74extern crate alloc;
75
76#[cfg(feature = "std")]
77extern crate std;
78
79#[cfg(not(feature = "std"))]
80compile_error!("This crate requires the \"std\" feature.");
81
82use alloc::vec::Vec;
83
84#[cfg(feature = "serde")]
85use serde::{Deserialize, Serialize};
86
87pub mod utils;
88
89pub mod arena_build;
90mod entities;
91mod firstpass;
92mod linklabel;
93mod mdx;
94mod parse;
95mod puncttable;
96mod scanners;
97mod strings;
98mod tree;
99
100use core::fmt::Display;
101
102pub use crate::{
103    arena_build::{parse, DEFAULT_OPTIONS, MDX_OPTIONS},
104    parse::{
105        BrokenLink, BrokenLinkCallback, DefaultParserCallbacks, OffsetIter, Parser,
106        ParserCallbacks, RefDefs,
107    },
108    strings::{CowStr, InlineStr},
109    utils::*,
110};
111
112/// Codeblock kind.
113#[derive(Clone, Debug, PartialEq)]
114#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
115pub enum CodeBlockKind<'a> {
116    Indented,
117    /// The value contained in the tag describes the language of the code, which may be empty.
118    #[cfg_attr(feature = "serde", serde(borrow))]
119    Fenced(CowStr<'a>),
120}
121
122impl<'a> CodeBlockKind<'a> {
123    pub fn is_indented(&self) -> bool {
124        matches!(*self, CodeBlockKind::Indented)
125    }
126
127    pub fn is_fenced(&self) -> bool {
128        matches!(*self, CodeBlockKind::Fenced(_))
129    }
130
131    pub fn into_static(self) -> CodeBlockKind<'static> {
132        match self {
133            CodeBlockKind::Indented => CodeBlockKind::Indented,
134            CodeBlockKind::Fenced(s) => CodeBlockKind::Fenced(s.into_static()),
135        }
136    }
137}
138
139/// BlockQuote kind (Note, Tip, Important, Warning, Caution).
140#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
141#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
142pub enum BlockQuoteKind {
143    Note,
144    Tip,
145    Important,
146    Warning,
147    Caution,
148}
149
150/// Directive kind.
151#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
152#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
153pub enum DirectiveKind {
154    Container,
155    Leaf,
156    Text,
157}
158
159/// Metadata block kind.
160#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
161#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
162pub enum MetadataBlockKind {
163    YamlStyle,
164    PlusesStyle,
165}
166
167/// Tags for elements that can contain other elements.
168#[derive(Clone, Debug, PartialEq)]
169#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
170pub enum Tag<'a> {
171    /// A paragraph of text and other inline elements.
172    Paragraph,
173
174    /// A heading, with optional identifier, classes and custom attributes.
175    /// The identifier is prefixed with `#` and the last one in the attributes
176    /// list is chosen, classes are prefixed with `.` and custom attributes
177    /// have no prefix and can optionally have a value (`myattr` or `myattr=myvalue`).
178    ///
179    /// `id`, `classes` and `attrs` are only parsed and populated with [`Options::ENABLE_HEADING_ATTRIBUTES`], `None` or empty otherwise.
180    Heading {
181        level: HeadingLevel,
182        id: Option<CowStr<'a>>,
183        classes: Vec<CowStr<'a>>,
184        /// The first item of the tuple is the attr and second one the value.
185        attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
186    },
187
188    /// A block quote.
189    ///
190    /// The `BlockQuoteKind` is only parsed & populated with [`Options::ENABLE_GFM`], `None` otherwise.
191    ///
192    /// ```markdown
193    /// > regular quote
194    ///
195    /// > [!NOTE]
196    /// > note quote
197    /// ```
198    BlockQuote(Option<BlockQuoteKind>),
199    /// A code block.
200    CodeBlock(CodeBlockKind<'a>),
201    /// A directive (container, leaf, or text).
202    /// Only parsed and emitted with [`Options::ENABLE_CONTAINER_EXTENSIONS`].
203    Directive {
204        kind: DirectiveKind,
205        name: CowStr<'a>,
206        attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
207    },
208
209    /// An HTML block.
210    ///
211    /// A line that begins with some predefined tags (HTML block tags) (see [CommonMark Spec](https://spec.commonmark.org/0.31.2/#html-blocks) for more details) or any tag that is followed only by whitespace.
212    ///
213    /// Most HTML blocks end on an empty line, though some e.g. `<pre>` like `<script>` or `<!-- Comments -->` don't.
214    /// ```markdown
215    /// <body> Is HTML block even though here is non-whitespace.
216    /// Block ends on an empty line.
217    ///
218    /// <some-random-tag>
219    /// This is HTML block.
220    ///
221    /// <pre> Doesn't end on empty lines.
222    ///
223    /// This is still the same block.</pre>
224    /// ```
225    HtmlBlock,
226
227    /// A list. If the list is ordered the first field indicates the number of the first item.
228    /// The second field is `true` when the list is tight (no blank lines between items).
229    /// Contains only list items.
230    List(Option<u64>, bool),
231    /// A list item.
232    Item,
233    /// A footnote definition. The value contained is the footnote's label by which it can
234    /// be referred to.
235    ///
236    /// Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
237    #[cfg_attr(feature = "serde", serde(borrow))]
238    FootnoteDefinition(CowStr<'a>),
239
240    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
241    DefinitionList,
242    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
243    DefinitionListTitle,
244    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
245    DefinitionListDefinition,
246
247    /// A table. Contains a vector describing the text-alignment for each of its columns.
248    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
249    Table(Vec<Alignment>),
250    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
251    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
252    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
253    TableHead,
254    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
255    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
256    TableRow,
257    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
258    TableCell,
259
260    // span-level tags
261    /// [Emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
262    /// ```markdown
263    /// half*emph* _strong_ _multi _level__
264    /// ```
265    Emphasis,
266    /// [Strong emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
267    /// ```markdown
268    /// half**strong** __strong__ __multi __level____
269    /// ```
270    Strong,
271    /// Only parsed and emitted with [`Options::ENABLE_STRIKETHROUGH`].
272    ///
273    /// ```markdown
274    /// ~strike through~
275    /// ```
276    Strikethrough,
277    /// Only parsed and emitted with [`Options::ENABLE_SUPERSCRIPT`].
278    ///
279    /// ```markdown
280    /// ^superscript^
281    /// ```
282    Superscript,
283    /// Only parsed and emitted with [`Options::ENABLE_SUBSCRIPT`], if disabled `~something~` is parsed as [`Strikethrough`](Self::Strikethrough).
284    /// ```markdown
285    /// ~subscript~ ~~if also enabled this is strikethrough~~
286    /// ```
287    Subscript,
288
289    /// A link.
290    Link {
291        link_type: LinkType,
292        dest_url: CowStr<'a>,
293        title: CowStr<'a>,
294        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
295        id: CowStr<'a>,
296    },
297
298    /// An image. The first field is the link type, the second the destination URL and the third is a title,
299    /// the fourth is the link identifier.
300    Image {
301        link_type: LinkType,
302        dest_url: CowStr<'a>,
303        title: CowStr<'a>,
304        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
305        id: CowStr<'a>,
306    },
307
308    /// A metadata block.
309    /// Only parsed and emitted with [`Options::ENABLE_YAML_STYLE_METADATA_BLOCKS`]
310    /// or [`Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS`].
311    MetadataBlock(MetadataBlockKind),
312
313    /// An MDX JSX element (flow-level, i.e. block).
314    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
315    /// The `CowStr` is the raw JSX tag content (e.g. `Component x={1}`).
316    /// ```mdx
317    /// <Component x={1}>
318    ///   children
319    /// </Component>
320    /// ```
321    #[cfg_attr(feature = "serde", serde(borrow))]
322    MdxJsxFlowElement(CowStr<'a>),
323
324    /// An MDX JSX element (text-level, i.e. inline).
325    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
326    #[cfg_attr(feature = "serde", serde(borrow))]
327    MdxJsxTextElement(CowStr<'a>),
328}
329
330impl<'a> Tag<'a> {
331    pub fn to_end(&self) -> TagEnd {
332        match self {
333            Tag::Paragraph => TagEnd::Paragraph,
334            Tag::Heading { level, .. } => TagEnd::Heading(*level),
335            Tag::BlockQuote(kind) => TagEnd::BlockQuote(*kind),
336            Tag::CodeBlock(_) => TagEnd::CodeBlock,
337            Tag::Directive { kind, .. } => TagEnd::Directive(*kind),
338            Tag::HtmlBlock => TagEnd::HtmlBlock,
339            Tag::List(number, _) => TagEnd::List(number.is_some()),
340            Tag::Item => TagEnd::Item,
341            Tag::FootnoteDefinition(_) => TagEnd::FootnoteDefinition,
342            Tag::Table(_) => TagEnd::Table,
343            Tag::TableHead => TagEnd::TableHead,
344            Tag::TableRow => TagEnd::TableRow,
345            Tag::TableCell => TagEnd::TableCell,
346            Tag::Subscript => TagEnd::Subscript,
347            Tag::Superscript => TagEnd::Superscript,
348            Tag::Emphasis => TagEnd::Emphasis,
349            Tag::Strong => TagEnd::Strong,
350            Tag::Strikethrough => TagEnd::Strikethrough,
351            Tag::Link { .. } => TagEnd::Link,
352            Tag::Image { .. } => TagEnd::Image,
353            Tag::MetadataBlock(kind) => TagEnd::MetadataBlock(*kind),
354            Tag::DefinitionList => TagEnd::DefinitionList,
355            Tag::DefinitionListTitle => TagEnd::DefinitionListTitle,
356            Tag::DefinitionListDefinition => TagEnd::DefinitionListDefinition,
357            Tag::MdxJsxFlowElement(_) => TagEnd::MdxJsxFlowElement,
358            Tag::MdxJsxTextElement(_) => TagEnd::MdxJsxTextElement,
359        }
360    }
361
362    pub fn into_static(self) -> Tag<'static> {
363        match self {
364            Tag::Paragraph => Tag::Paragraph,
365            Tag::Heading {
366                level,
367                id,
368                classes,
369                attrs,
370            } => Tag::Heading {
371                level,
372                id: id.map(|s| s.into_static()),
373                classes: classes.into_iter().map(|s| s.into_static()).collect(),
374                attrs: attrs
375                    .into_iter()
376                    .map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
377                    .collect(),
378            },
379            Tag::BlockQuote(k) => Tag::BlockQuote(k),
380            Tag::CodeBlock(kb) => Tag::CodeBlock(kb.into_static()),
381            Tag::Directive {
382                kind,
383                name,
384                attributes,
385            } => Tag::Directive {
386                kind,
387                name: name.into_static(),
388                attributes: attributes
389                    .into_iter()
390                    .map(|(k, v)| (k.into_static(), v.into_static()))
391                    .collect(),
392            },
393            Tag::HtmlBlock => Tag::HtmlBlock,
394            Tag::List(v, t) => Tag::List(v, t),
395            Tag::Item => Tag::Item,
396            Tag::FootnoteDefinition(a) => Tag::FootnoteDefinition(a.into_static()),
397            Tag::Table(v) => Tag::Table(v),
398            Tag::TableHead => Tag::TableHead,
399            Tag::TableRow => Tag::TableRow,
400            Tag::TableCell => Tag::TableCell,
401            Tag::Emphasis => Tag::Emphasis,
402            Tag::Strong => Tag::Strong,
403            Tag::Strikethrough => Tag::Strikethrough,
404            Tag::Superscript => Tag::Superscript,
405            Tag::Subscript => Tag::Subscript,
406            Tag::Link {
407                link_type,
408                dest_url,
409                title,
410                id,
411            } => Tag::Link {
412                link_type,
413                dest_url: dest_url.into_static(),
414                title: title.into_static(),
415                id: id.into_static(),
416            },
417            Tag::Image {
418                link_type,
419                dest_url,
420                title,
421                id,
422            } => Tag::Image {
423                link_type,
424                dest_url: dest_url.into_static(),
425                title: title.into_static(),
426                id: id.into_static(),
427            },
428            Tag::MetadataBlock(v) => Tag::MetadataBlock(v),
429            Tag::DefinitionList => Tag::DefinitionList,
430            Tag::DefinitionListTitle => Tag::DefinitionListTitle,
431            Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
432            Tag::MdxJsxFlowElement(s) => Tag::MdxJsxFlowElement(s.into_static()),
433            Tag::MdxJsxTextElement(s) => Tag::MdxJsxTextElement(s.into_static()),
434        }
435    }
436}
437
438/// The end of a `Tag`.
439#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
440#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
441pub enum TagEnd {
442    Paragraph,
443    Heading(HeadingLevel),
444
445    BlockQuote(Option<BlockQuoteKind>),
446    CodeBlock,
447    Directive(DirectiveKind),
448
449    HtmlBlock,
450
451    /// A list, `true` for ordered lists.
452    List(bool),
453    Item,
454    FootnoteDefinition,
455
456    DefinitionList,
457    DefinitionListTitle,
458    DefinitionListDefinition,
459
460    Table,
461    TableHead,
462    TableRow,
463    TableCell,
464
465    Emphasis,
466    Strong,
467    Strikethrough,
468    Superscript,
469    Subscript,
470
471    Link,
472    Image,
473
474    MetadataBlock(MetadataBlockKind),
475
476    MdxJsxFlowElement,
477    MdxJsxTextElement,
478}
479
480/// Make sure `TagEnd` is no more than two bytes in size.
481/// This is why it's used instead of just using `Tag`.
482#[cfg(target_pointer_width = "64")]
483const _STATIC_ASSERT_TAG_END_SIZE: [(); 2] = [(); core::mem::size_of::<TagEnd>()];
484
485impl<'a> From<Tag<'a>> for TagEnd {
486    fn from(value: Tag) -> Self {
487        value.to_end()
488    }
489}
490
491#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
492#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
493pub enum HeadingLevel {
494    H1 = 1,
495    H2,
496    H3,
497    H4,
498    H5,
499    H6,
500}
501
502impl Display for HeadingLevel {
503    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
504        match self {
505            Self::H1 => write!(f, "h1"),
506            Self::H2 => write!(f, "h2"),
507            Self::H3 => write!(f, "h3"),
508            Self::H4 => write!(f, "h4"),
509            Self::H5 => write!(f, "h5"),
510            Self::H6 => write!(f, "h6"),
511        }
512    }
513}
514
515/// Returned when trying to convert a `usize` into a `Heading` but it fails
516/// because the usize isn't a valid heading level
517#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
518pub struct InvalidHeadingLevel(usize);
519
520impl TryFrom<usize> for HeadingLevel {
521    type Error = InvalidHeadingLevel;
522
523    fn try_from(value: usize) -> Result<Self, Self::Error> {
524        match value {
525            1 => Ok(Self::H1),
526            2 => Ok(Self::H2),
527            3 => Ok(Self::H3),
528            4 => Ok(Self::H4),
529            5 => Ok(Self::H5),
530            6 => Ok(Self::H6),
531            _ => Err(InvalidHeadingLevel(value)),
532        }
533    }
534}
535
536/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
537#[derive(Clone, Debug, PartialEq, Copy)]
538#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
539pub enum LinkType {
540    /// Inline link like `[foo](bar)`
541    Inline,
542    /// Reference link like `[foo][bar]`
543    Reference,
544    /// Reference without destination in the document, but resolved by the broken_link_callback
545    ReferenceUnknown,
546    /// Collapsed link like `[foo][]`
547    Collapsed,
548    /// Collapsed link without destination in the document, but resolved by the broken_link_callback
549    CollapsedUnknown,
550    /// Shortcut link like `[foo]`
551    Shortcut,
552    /// Shortcut without destination in the document, but resolved by the broken_link_callback
553    ShortcutUnknown,
554    /// Autolink like `<http://foo.bar/baz>`
555    Autolink,
556    /// Email address in autolink like `<john@example.org>`
557    Email,
558    /// Wikilink link like `[[foo]]` or `[[foo|bar]]`
559    WikiLink {
560        /// `true` if the wikilink was piped.
561        ///
562        /// * `true` - `[[foo|bar]]`
563        /// * `false` - `[[foo]]`
564        has_pothole: bool,
565    },
566}
567
568impl LinkType {
569    /// Map the link type to an equivalent _Unknown link type.
570    fn to_unknown(self) -> Self {
571        match self {
572            LinkType::Reference => LinkType::ReferenceUnknown,
573            LinkType::Collapsed => LinkType::CollapsedUnknown,
574            LinkType::Shortcut => LinkType::ShortcutUnknown,
575            _ => unreachable!(),
576        }
577    }
578}
579
580/// Markdown events that are generated in a preorder traversal of the document
581/// tree, with additional `End` events whenever all of an inner node's children
582/// have been visited.
583#[derive(Clone, Debug, PartialEq)]
584#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
585pub enum Event<'a> {
586    /// Start of a tagged element. Events that are yielded after this event
587    /// and before its corresponding `End` event are inside this element.
588    /// Start and end events are guaranteed to be balanced.
589    #[cfg_attr(feature = "serde", serde(borrow))]
590    Start(Tag<'a>),
591    /// End of a tagged element.
592    End(TagEnd),
593    /// A text node.
594    ///
595    /// All text, outside and inside [`Tag`]s.
596    #[cfg_attr(feature = "serde", serde(borrow))]
597    Text(CowStr<'a>),
598    /// An [inline code node](https://spec.commonmark.org/0.31.2/#code-spans).
599    ///
600    /// ```markdown
601    /// `code`
602    /// ```
603    #[cfg_attr(feature = "serde", serde(borrow))]
604    Code(CowStr<'a>),
605    /// An inline math environment node.
606    /// Requires [`Options::ENABLE_MATH`].
607    ///
608    /// ```markdown
609    /// $math$
610    /// ```
611    #[cfg_attr(feature = "serde", serde(borrow))]
612    InlineMath(CowStr<'a>),
613    /// A display math environment node.
614    /// Requires [`Options::ENABLE_MATH`].
615    ///
616    /// ```markdown
617    /// $$math$$
618    /// ```
619    #[cfg_attr(feature = "serde", serde(borrow))]
620    DisplayMath(CowStr<'a>),
621    /// An HTML node.
622    ///
623    /// A line of HTML inside [`Tag::HtmlBlock`] includes the line break.
624    #[cfg_attr(feature = "serde", serde(borrow))]
625    Html(CowStr<'a>),
626    /// An [inline HTML node](https://spec.commonmark.org/0.31.2/#raw-html).
627    ///
628    /// Contains only the tag itself, e.g. `<open-tag>`, `</close-tag>` or `<!-- comment -->`.
629    ///
630    /// **Note**: Under some conditions HTML can also be parsed as an HTML Block, see [`Tag::HtmlBlock`] for details.
631    #[cfg_attr(feature = "serde", serde(borrow))]
632    InlineHtml(CowStr<'a>),
633    /// A reference to a footnote with given label, defined
634    /// by an event with a [`Tag::FootnoteDefinition`] tag. Definitions and references to them may
635    /// occur in any order. Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
636    ///
637    /// ```markdown
638    /// [^1]
639    /// ```
640    #[cfg_attr(feature = "serde", serde(borrow))]
641    FootnoteReference(CowStr<'a>),
642    /// A [soft line break](https://spec.commonmark.org/0.31.2/#soft-line-breaks).
643    ///
644    /// Any line break that isn't a [`HardBreak`](Self::HardBreak), or the end of e.g. a paragraph.
645    SoftBreak,
646    /// A [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks).
647    ///
648    /// A line ending that is either preceded by at least two spaces or `\`.
649    ///
650    /// ```markdown
651    /// hard··
652    /// line\
653    /// breaks
654    /// ```
655    /// *`·` is a space*
656    HardBreak,
657    /// A horizontal ruler.
658    ///
659    /// ```markdown
660    /// ***
661    /// ···---
662    /// _·_··_····_··
663    /// ```
664    /// *`·` is any whitespace*
665    Rule,
666    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
667    /// Only parsed and emitted with [`Options::ENABLE_TASKLISTS`].
668    /// ```markdown
669    /// - [ ] unchecked
670    /// - [x] checked
671    /// ```
672    TaskListMarker(bool),
673
674    /// An MDX flow expression (block-level).
675    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
676    /// ```mdx
677    /// {1 + 1}
678    /// ```
679    #[cfg_attr(feature = "serde", serde(borrow))]
680    MdxFlowExpression(CowStr<'a>),
681
682    /// An MDX text expression (inline).
683    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
684    /// ```mdx
685    /// a]n {expression} here
686    /// ```
687    #[cfg_attr(feature = "serde", serde(borrow))]
688    MdxTextExpression(CowStr<'a>),
689
690    /// An MDX ESM block (import/export at document level).
691    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
692    /// ```mdx
693    /// import {Chart} from './chart.js'
694    /// export const meta = {}
695    /// ```
696    #[cfg_attr(feature = "serde", serde(borrow))]
697    MdxEsm(CowStr<'a>),
698}
699
700impl<'a> Event<'a> {
701    pub fn into_static(self) -> Event<'static> {
702        match self {
703            Event::Start(t) => Event::Start(t.into_static()),
704            Event::End(e) => Event::End(e),
705            Event::Text(s) => Event::Text(s.into_static()),
706            Event::Code(s) => Event::Code(s.into_static()),
707            Event::InlineMath(s) => Event::InlineMath(s.into_static()),
708            Event::DisplayMath(s) => Event::DisplayMath(s.into_static()),
709            Event::Html(s) => Event::Html(s.into_static()),
710            Event::InlineHtml(s) => Event::InlineHtml(s.into_static()),
711            Event::FootnoteReference(s) => Event::FootnoteReference(s.into_static()),
712            Event::SoftBreak => Event::SoftBreak,
713            Event::HardBreak => Event::HardBreak,
714            Event::Rule => Event::Rule,
715            Event::TaskListMarker(b) => Event::TaskListMarker(b),
716            Event::MdxFlowExpression(s) => Event::MdxFlowExpression(s.into_static()),
717            Event::MdxTextExpression(s) => Event::MdxTextExpression(s.into_static()),
718            Event::MdxEsm(s) => Event::MdxEsm(s.into_static()),
719        }
720    }
721}
722
723/// Table column text alignment.
724#[derive(Copy, Clone, Debug, PartialEq)]
725#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
726pub enum Alignment {
727    /// Default text alignment.
728    None,
729    Left,
730    Center,
731    Right,
732}
733
734bitflags::bitflags! {
735    /// Option struct containing flags for enabling extra features
736    /// that are not part of the CommonMark spec.
737    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
738    pub struct Options: u32 {
739        const ENABLE_TABLES = 1 << 1;
740        /// GitHub-compatible footnote syntax.
741        ///
742        /// Footnotes are referenced with the syntax `[^IDENT]`,
743        /// and defined with an identifier followed by a colon at top level.
744        ///
745        /// ---
746        ///
747        /// ```markdown
748        /// Footnote referenced [^1].
749        ///
750        /// [^1]: footnote defined
751        /// ```
752        ///
753        /// Footnote referenced [^1].
754        ///
755        /// [^1]: footnote defined
756        const ENABLE_FOOTNOTES = 1 << 2;
757        const ENABLE_STRIKETHROUGH = 1 << 3;
758        const ENABLE_TASKLISTS = 1 << 4;
759        /// Enables replacement of ASCII punctuation characters with
760        /// Unicode ligatures and smart quotes.
761        ///
762        /// This includes replacing `--` with `–`, `---` with `—`, `...` with `…`,
763        /// `”quote”` with `\u{201c}quote\u{201d}`, and `’quote’` with `\u{2018}quote\u{2019}`.
764        ///
765        /// Equivalent to enabling all of `ENABLE_SMART_QUOTES`,
766        /// `ENABLE_SMART_DASHES`, and `ENABLE_SMART_ELLIPSES`.
767        const ENABLE_SMART_PUNCTUATION = 1 << 5;
768        /// Replace straight quotes (`”`, `’`) with curly/smart quotes.
769        const ENABLE_SMART_QUOTES = 1 << 18;
770        /// Replace `--` with en-dash and `---` with em-dash.
771        const ENABLE_SMART_DASHES = 1 << 19;
772        /// Replace `...` with ellipsis (`…`).
773        const ENABLE_SMART_ELLIPSES = 1 << 20;
774        /// Extension to allow headings to have ID and classes.
775        ///
776        /// `# text { #id .class1 .class2 myattr other_attr=myvalue }`
777        /// is interpreted as a level 1 heading
778        /// with the content `text`, ID `id`, classes `class1` and `class2` and
779        /// custom attributes `myattr` (without value) and
780        /// `other_attr` with value `myvalue`.
781        /// Note that ID, classes, and custom attributes should be space-separated.
782        const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
783        /// Metadata blocks in YAML style, i.e.:
784        /// - starting with a `---` line
785        /// - ending with a `---` or `...` line
786        const ENABLE_YAML_STYLE_METADATA_BLOCKS = 1 << 7;
787        /// Metadata blocks delimited by:
788        /// - `+++` line at start
789        /// - `+++` line at end
790        const ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS = 1 << 8;
791        /// With this feature enabled, two events `Event::InlineMath` and `Event::DisplayMath`
792        /// are emitted that conventionally contain TeX formulas.
793        const ENABLE_MATH = 1 << 10;
794        /// Misc GitHub Flavored Markdown features not supported in CommonMark.
795        const ENABLE_GFM = 1 << 11;
796        /// GitHub-style blockquote alerts ([!NOTE], [!TIP], [!IMPORTANT], [!WARNING], [!CAUTION]).
797        /// Not part of the GFM spec — this is a GitHub-specific feature.
798        const ENABLE_GITHUB_ALERTS = 1 << 21;
799        /// Commonmark-HS-Extensions compatible definition lists.
800        ///
801        /// ```markdown
802        /// title 1
803        ///   : definition 1
804        ///
805        /// title 2
806        ///   : definition 2a
807        ///   : definition 2b
808        /// ```
809        const ENABLE_DEFINITION_LIST = 1 << 12;
810        const ENABLE_SUPERSCRIPT = 1 << 13;
811        const ENABLE_SUBSCRIPT = 1 << 14;
812        /// Obsidian-style Wikilinks.
813        const ENABLE_WIKILINKS = 1 << 15;
814        /// Directives: container (:::), leaf (::), and text (:) directives.
815        const ENABLE_CONTAINER_EXTENSIONS = 1 << 16;
816        /// MDX: enables JSX elements, expressions, and ESM import/export.
817        const ENABLE_MDX = 1 << 17;
818    }
819}
820
821impl Options {
822    pub(crate) fn has_smart_quotes(&self) -> bool {
823        self.contains(Options::ENABLE_SMART_PUNCTUATION)
824            || self.contains(Options::ENABLE_SMART_QUOTES)
825    }
826
827    pub(crate) fn has_smart_dashes(&self) -> bool {
828        self.contains(Options::ENABLE_SMART_PUNCTUATION)
829            || self.contains(Options::ENABLE_SMART_DASHES)
830    }
831
832    pub(crate) fn has_smart_ellipses(&self) -> bool {
833        self.contains(Options::ENABLE_SMART_PUNCTUATION)
834            || self.contains(Options::ENABLE_SMART_ELLIPSES)
835    }
836}