Skip to main content

satteri_pulldown_cmark/
lib.rs

1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to build an arena representation via [`parse()`].
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use satteri_pulldown_cmark::{parse, Options};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! let mut options = Options::empty();
36//! options.insert(Options::ENABLE_STRIKETHROUGH);
37//! let (arena, _) = parse(markdown_input, options);
38//! let html = satteri_ast::mdast_to_html(&arena);
39//!
40//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
41//! assert_eq!(expected_html, &html);
42//! ```
43//!
44//! Note that consecutive text events can happen due to the manner in which the
45//! parser evaluates the source. A utility `TextMergeStream` exists to improve
46//! the comfort of iterating the events:
47//!
48//! ```rust
49//! use satteri_pulldown_cmark::{Event, Parser, TextMergeStream};
50//!
51//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
52//!
53//! let iterator = TextMergeStream::new(Parser::new(markdown_input));
54//!
55//! for event in iterator {
56//!     match event {
57//!         Event::Text(text) => println!("{}", text),
58//!         _ => {}
59//!     }
60//! }
61//! ```
62//!
63#![warn(
64    clippy::alloc_instead_of_core,
65    clippy::std_instead_of_alloc,
66    clippy::std_instead_of_core
67)]
68// Forbid unsafe code unless the SIMD feature is enabled.
69#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
70#![warn(missing_debug_implementations)]
71#![cfg_attr(not(feature = "std"), no_std)]
72
73#[macro_use]
74extern crate alloc;
75
76#[cfg(feature = "std")]
77extern crate std;
78
79#[cfg(not(feature = "std"))]
80compile_error!("This crate requires the \"std\" feature.");
81
82use alloc::vec::Vec;
83
84#[cfg(feature = "serde")]
85use serde::{Deserialize, Serialize};
86
87pub mod utils;
88
89pub mod arena_build;
90mod entities;
91mod firstpass;
92mod linklabel;
93mod mdx;
94mod parse;
95pub(crate) mod post_passes;
96mod puncttable;
97mod scanners;
98mod strings;
99mod tree;
100
101use core::fmt::Display;
102
103pub use crate::{
104    arena_build::{parse, DEFAULT_OPTIONS, MDX_OPTIONS},
105    parse::{
106        BrokenLink, BrokenLinkCallback, DefaultParserCallbacks, OffsetIter, Parser,
107        ParserCallbacks, RefDefs,
108    },
109    strings::{CowStr, InlineStr},
110    utils::*,
111};
112
113/// Codeblock kind.
114#[derive(Clone, Debug, PartialEq)]
115#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
116pub enum CodeBlockKind<'a> {
117    Indented,
118    /// The value contained in the tag describes the language of the code, which may be empty.
119    #[cfg_attr(feature = "serde", serde(borrow))]
120    Fenced(CowStr<'a>),
121}
122
123impl<'a> CodeBlockKind<'a> {
124    pub fn is_indented(&self) -> bool {
125        matches!(*self, CodeBlockKind::Indented)
126    }
127
128    pub fn is_fenced(&self) -> bool {
129        matches!(*self, CodeBlockKind::Fenced(_))
130    }
131
132    pub fn into_static(self) -> CodeBlockKind<'static> {
133        match self {
134            CodeBlockKind::Indented => CodeBlockKind::Indented,
135            CodeBlockKind::Fenced(s) => CodeBlockKind::Fenced(s.into_static()),
136        }
137    }
138}
139
140/// BlockQuote kind (Note, Tip, Important, Warning, Caution).
141#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
142#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
143pub enum BlockQuoteKind {
144    Note,
145    Tip,
146    Important,
147    Warning,
148    Caution,
149}
150
151/// Directive kind.
152#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
153#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
154pub enum DirectiveKind {
155    Container,
156    Leaf,
157    Text,
158}
159
160/// Metadata block kind.
161#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
162#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
163pub enum MetadataBlockKind {
164    YamlStyle,
165    PlusesStyle,
166}
167
168/// Tags for elements that can contain other elements.
169#[derive(Clone, Debug, PartialEq)]
170#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
171pub enum Tag<'a> {
172    /// A paragraph of text and other inline elements.
173    Paragraph,
174
175    /// A heading, with optional identifier, classes and custom attributes.
176    /// The identifier is prefixed with `#` and the last one in the attributes
177    /// list is chosen, classes are prefixed with `.` and custom attributes
178    /// have no prefix and can optionally have a value (`myattr` or `myattr=myvalue`).
179    ///
180    /// `id`, `classes` and `attrs` are only parsed and populated with [`Options::ENABLE_HEADING_ATTRIBUTES`], `None` or empty otherwise.
181    Heading {
182        level: HeadingLevel,
183        id: Option<CowStr<'a>>,
184        classes: Vec<CowStr<'a>>,
185        /// The first item of the tuple is the attr and second one the value.
186        attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
187    },
188
189    /// A block quote.
190    ///
191    /// The `BlockQuoteKind` is only parsed & populated with [`Options::ENABLE_GFM`], `None` otherwise.
192    ///
193    /// ```markdown
194    /// > regular quote
195    ///
196    /// > [!NOTE]
197    /// > note quote
198    /// ```
199    BlockQuote(Option<BlockQuoteKind>),
200    /// A code block.
201    CodeBlock(CodeBlockKind<'a>),
202    /// A directive (container, leaf, or text).
203    /// Only parsed and emitted with [`Options::ENABLE_DIRECTIVE`].
204    Directive {
205        kind: DirectiveKind,
206        name: CowStr<'a>,
207        attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
208    },
209
210    /// An HTML block.
211    ///
212    /// A line that begins with some predefined tags (HTML block tags) (see [CommonMark Spec](https://spec.commonmark.org/0.31.2/#html-blocks) for more details) or any tag that is followed only by whitespace.
213    ///
214    /// Most HTML blocks end on an empty line, though some e.g. `<pre>` like `<script>` or `<!-- Comments -->` don't.
215    /// ```markdown
216    /// <body> Is HTML block even though here is non-whitespace.
217    /// Block ends on an empty line.
218    ///
219    /// <some-random-tag>
220    /// This is HTML block.
221    ///
222    /// <pre> Doesn't end on empty lines.
223    ///
224    /// This is still the same block.</pre>
225    /// ```
226    HtmlBlock,
227
228    /// A list. If the list is ordered the first field indicates the number of the first item.
229    /// The second field is `true` when the list is tight (no blank lines between items).
230    /// Contains only list items.
231    List(Option<u64>, bool),
232    /// A list item.
233    Item,
234    /// A footnote definition. The value contained is the footnote's label by which it can
235    /// be referred to.
236    ///
237    /// Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
238    #[cfg_attr(feature = "serde", serde(borrow))]
239    FootnoteDefinition(CowStr<'a>),
240
241    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
242    DefinitionList,
243    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
244    DefinitionListTitle,
245    /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
246    DefinitionListDefinition,
247
248    /// A table. Contains a vector describing the text-alignment for each of its columns.
249    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
250    Table(Vec<Alignment>),
251    /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
252    /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
253    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
254    TableHead,
255    /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
256    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
257    TableRow,
258    /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
259    TableCell,
260
261    // span-level tags
262    /// [Emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
263    /// ```markdown
264    /// half*emph* _strong_ _multi _level__
265    /// ```
266    Emphasis,
267    /// [Strong emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
268    /// ```markdown
269    /// half**strong** __strong__ __multi __level____
270    /// ```
271    Strong,
272    /// Only parsed and emitted with [`Options::ENABLE_STRIKETHROUGH`].
273    ///
274    /// ```markdown
275    /// ~strike through~
276    /// ```
277    Strikethrough,
278    /// Only parsed and emitted with [`Options::ENABLE_SUPERSCRIPT`].
279    ///
280    /// ```markdown
281    /// ^superscript^
282    /// ```
283    Superscript,
284    /// Only parsed and emitted with [`Options::ENABLE_SUBSCRIPT`], if disabled `~something~` is parsed as [`Strikethrough`](Self::Strikethrough).
285    /// ```markdown
286    /// ~subscript~ ~~if also enabled this is strikethrough~~
287    /// ```
288    Subscript,
289
290    /// A link.
291    Link {
292        link_type: LinkType,
293        dest_url: CowStr<'a>,
294        title: CowStr<'a>,
295        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
296        id: CowStr<'a>,
297    },
298
299    /// An image. The first field is the link type, the second the destination URL and the third is a title,
300    /// the fourth is the link identifier.
301    Image {
302        link_type: LinkType,
303        dest_url: CowStr<'a>,
304        title: CowStr<'a>,
305        /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
306        id: CowStr<'a>,
307    },
308
309    /// A metadata block.
310    /// Only parsed and emitted with [`Options::ENABLE_YAML_STYLE_METADATA_BLOCKS`]
311    /// or [`Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS`].
312    MetadataBlock(MetadataBlockKind),
313
314    /// An MDX JSX element (flow-level, i.e. block).
315    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
316    /// The `CowStr` is the raw JSX tag content (e.g. `Component x={1}`).
317    /// ```mdx
318    /// <Component x={1}>
319    ///   children
320    /// </Component>
321    /// ```
322    #[cfg_attr(feature = "serde", serde(borrow))]
323    MdxJsxFlowElement(CowStr<'a>),
324
325    /// An MDX JSX element (text-level, i.e. inline).
326    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
327    #[cfg_attr(feature = "serde", serde(borrow))]
328    MdxJsxTextElement(CowStr<'a>),
329}
330
331impl<'a> Tag<'a> {
332    pub fn to_end(&self) -> TagEnd {
333        match self {
334            Tag::Paragraph => TagEnd::Paragraph,
335            Tag::Heading { level, .. } => TagEnd::Heading(*level),
336            Tag::BlockQuote(kind) => TagEnd::BlockQuote(*kind),
337            Tag::CodeBlock(_) => TagEnd::CodeBlock,
338            Tag::Directive { kind, .. } => TagEnd::Directive(*kind),
339            Tag::HtmlBlock => TagEnd::HtmlBlock,
340            Tag::List(number, _) => TagEnd::List(number.is_some()),
341            Tag::Item => TagEnd::Item,
342            Tag::FootnoteDefinition(_) => TagEnd::FootnoteDefinition,
343            Tag::Table(_) => TagEnd::Table,
344            Tag::TableHead => TagEnd::TableHead,
345            Tag::TableRow => TagEnd::TableRow,
346            Tag::TableCell => TagEnd::TableCell,
347            Tag::Subscript => TagEnd::Subscript,
348            Tag::Superscript => TagEnd::Superscript,
349            Tag::Emphasis => TagEnd::Emphasis,
350            Tag::Strong => TagEnd::Strong,
351            Tag::Strikethrough => TagEnd::Strikethrough,
352            Tag::Link { .. } => TagEnd::Link,
353            Tag::Image { .. } => TagEnd::Image,
354            Tag::MetadataBlock(kind) => TagEnd::MetadataBlock(*kind),
355            Tag::DefinitionList => TagEnd::DefinitionList,
356            Tag::DefinitionListTitle => TagEnd::DefinitionListTitle,
357            Tag::DefinitionListDefinition => TagEnd::DefinitionListDefinition,
358            Tag::MdxJsxFlowElement(_) => TagEnd::MdxJsxFlowElement,
359            Tag::MdxJsxTextElement(_) => TagEnd::MdxJsxTextElement,
360        }
361    }
362
363    pub fn into_static(self) -> Tag<'static> {
364        match self {
365            Tag::Paragraph => Tag::Paragraph,
366            Tag::Heading {
367                level,
368                id,
369                classes,
370                attrs,
371            } => Tag::Heading {
372                level,
373                id: id.map(|s| s.into_static()),
374                classes: classes.into_iter().map(|s| s.into_static()).collect(),
375                attrs: attrs
376                    .into_iter()
377                    .map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
378                    .collect(),
379            },
380            Tag::BlockQuote(k) => Tag::BlockQuote(k),
381            Tag::CodeBlock(kb) => Tag::CodeBlock(kb.into_static()),
382            Tag::Directive {
383                kind,
384                name,
385                attributes,
386            } => Tag::Directive {
387                kind,
388                name: name.into_static(),
389                attributes: attributes
390                    .into_iter()
391                    .map(|(k, v)| (k.into_static(), v.into_static()))
392                    .collect(),
393            },
394            Tag::HtmlBlock => Tag::HtmlBlock,
395            Tag::List(v, t) => Tag::List(v, t),
396            Tag::Item => Tag::Item,
397            Tag::FootnoteDefinition(a) => Tag::FootnoteDefinition(a.into_static()),
398            Tag::Table(v) => Tag::Table(v),
399            Tag::TableHead => Tag::TableHead,
400            Tag::TableRow => Tag::TableRow,
401            Tag::TableCell => Tag::TableCell,
402            Tag::Emphasis => Tag::Emphasis,
403            Tag::Strong => Tag::Strong,
404            Tag::Strikethrough => Tag::Strikethrough,
405            Tag::Superscript => Tag::Superscript,
406            Tag::Subscript => Tag::Subscript,
407            Tag::Link {
408                link_type,
409                dest_url,
410                title,
411                id,
412            } => Tag::Link {
413                link_type,
414                dest_url: dest_url.into_static(),
415                title: title.into_static(),
416                id: id.into_static(),
417            },
418            Tag::Image {
419                link_type,
420                dest_url,
421                title,
422                id,
423            } => Tag::Image {
424                link_type,
425                dest_url: dest_url.into_static(),
426                title: title.into_static(),
427                id: id.into_static(),
428            },
429            Tag::MetadataBlock(v) => Tag::MetadataBlock(v),
430            Tag::DefinitionList => Tag::DefinitionList,
431            Tag::DefinitionListTitle => Tag::DefinitionListTitle,
432            Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
433            Tag::MdxJsxFlowElement(s) => Tag::MdxJsxFlowElement(s.into_static()),
434            Tag::MdxJsxTextElement(s) => Tag::MdxJsxTextElement(s.into_static()),
435        }
436    }
437}
438
439/// The end of a `Tag`.
440#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
441#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
442pub enum TagEnd {
443    Paragraph,
444    Heading(HeadingLevel),
445
446    BlockQuote(Option<BlockQuoteKind>),
447    CodeBlock,
448    Directive(DirectiveKind),
449
450    HtmlBlock,
451
452    /// A list, `true` for ordered lists.
453    List(bool),
454    Item,
455    FootnoteDefinition,
456
457    DefinitionList,
458    DefinitionListTitle,
459    DefinitionListDefinition,
460
461    Table,
462    TableHead,
463    TableRow,
464    TableCell,
465
466    Emphasis,
467    Strong,
468    Strikethrough,
469    Superscript,
470    Subscript,
471
472    Link,
473    Image,
474
475    MetadataBlock(MetadataBlockKind),
476
477    MdxJsxFlowElement,
478    MdxJsxTextElement,
479}
480
481/// Make sure `TagEnd` is no more than two bytes in size.
482/// This is why it's used instead of just using `Tag`.
483#[cfg(target_pointer_width = "64")]
484const _STATIC_ASSERT_TAG_END_SIZE: [(); 2] = [(); core::mem::size_of::<TagEnd>()];
485
486impl<'a> From<Tag<'a>> for TagEnd {
487    fn from(value: Tag) -> Self {
488        value.to_end()
489    }
490}
491
492#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
493#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
494pub enum HeadingLevel {
495    H1 = 1,
496    H2,
497    H3,
498    H4,
499    H5,
500    H6,
501}
502
503impl Display for HeadingLevel {
504    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
505        match self {
506            Self::H1 => write!(f, "h1"),
507            Self::H2 => write!(f, "h2"),
508            Self::H3 => write!(f, "h3"),
509            Self::H4 => write!(f, "h4"),
510            Self::H5 => write!(f, "h5"),
511            Self::H6 => write!(f, "h6"),
512        }
513    }
514}
515
516/// Returned when trying to convert a `usize` into a `Heading` but it fails
517/// because the usize isn't a valid heading level
518#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
519pub struct InvalidHeadingLevel(usize);
520
521impl TryFrom<usize> for HeadingLevel {
522    type Error = InvalidHeadingLevel;
523
524    fn try_from(value: usize) -> Result<Self, Self::Error> {
525        match value {
526            1 => Ok(Self::H1),
527            2 => Ok(Self::H2),
528            3 => Ok(Self::H3),
529            4 => Ok(Self::H4),
530            5 => Ok(Self::H5),
531            6 => Ok(Self::H6),
532            _ => Err(InvalidHeadingLevel(value)),
533        }
534    }
535}
536
537/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
538#[derive(Clone, Debug, PartialEq, Copy)]
539#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
540pub enum LinkType {
541    /// Inline link like `[foo](bar)`
542    Inline,
543    /// Reference link like `[foo][bar]`
544    Reference,
545    /// Reference without destination in the document, but resolved by the broken_link_callback
546    ReferenceUnknown,
547    /// Collapsed link like `[foo][]`
548    Collapsed,
549    /// Collapsed link without destination in the document, but resolved by the broken_link_callback
550    CollapsedUnknown,
551    /// Shortcut link like `[foo]`
552    Shortcut,
553    /// Shortcut without destination in the document, but resolved by the broken_link_callback
554    ShortcutUnknown,
555    /// Autolink like `<http://foo.bar/baz>`
556    Autolink,
557    /// Email address in autolink like `<john@example.org>`
558    Email,
559    /// Wikilink link like `[[foo]]` or `[[foo|bar]]`
560    WikiLink {
561        /// `true` if the wikilink was piped.
562        ///
563        /// * `true` - `[[foo|bar]]`
564        /// * `false` - `[[foo]]`
565        has_pothole: bool,
566    },
567}
568
569impl LinkType {
570    /// Map the link type to an equivalent _Unknown link type.
571    fn to_unknown(self) -> Self {
572        match self {
573            LinkType::Reference => LinkType::ReferenceUnknown,
574            LinkType::Collapsed => LinkType::CollapsedUnknown,
575            LinkType::Shortcut => LinkType::ShortcutUnknown,
576            _ => unreachable!(),
577        }
578    }
579}
580
581/// Markdown events that are generated in a preorder traversal of the document
582/// tree, with additional `End` events whenever all of an inner node's children
583/// have been visited.
584#[derive(Clone, Debug, PartialEq)]
585#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
586pub enum Event<'a> {
587    /// Start of a tagged element. Events that are yielded after this event
588    /// and before its corresponding `End` event are inside this element.
589    /// Start and end events are guaranteed to be balanced.
590    #[cfg_attr(feature = "serde", serde(borrow))]
591    Start(Tag<'a>),
592    /// End of a tagged element.
593    End(TagEnd),
594    /// A text node.
595    ///
596    /// All text, outside and inside [`Tag`]s.
597    #[cfg_attr(feature = "serde", serde(borrow))]
598    Text(CowStr<'a>),
599    /// An [inline code node](https://spec.commonmark.org/0.31.2/#code-spans).
600    ///
601    /// ```markdown
602    /// `code`
603    /// ```
604    #[cfg_attr(feature = "serde", serde(borrow))]
605    Code(CowStr<'a>),
606    /// An inline math environment node.
607    /// Requires [`Options::ENABLE_MATH`].
608    ///
609    /// ```markdown
610    /// $math$
611    /// ```
612    #[cfg_attr(feature = "serde", serde(borrow))]
613    InlineMath(CowStr<'a>),
614    /// A display math environment node.
615    /// Requires [`Options::ENABLE_MATH`].
616    ///
617    /// ```markdown
618    /// $$math$$
619    /// ```
620    #[cfg_attr(feature = "serde", serde(borrow))]
621    DisplayMath(CowStr<'a>),
622    /// An HTML node.
623    ///
624    /// A line of HTML inside [`Tag::HtmlBlock`] includes the line break.
625    #[cfg_attr(feature = "serde", serde(borrow))]
626    Html(CowStr<'a>),
627    /// An [inline HTML node](https://spec.commonmark.org/0.31.2/#raw-html).
628    ///
629    /// Contains only the tag itself, e.g. `<open-tag>`, `</close-tag>` or `<!-- comment -->`.
630    ///
631    /// **Note**: Under some conditions HTML can also be parsed as an HTML Block, see [`Tag::HtmlBlock`] for details.
632    #[cfg_attr(feature = "serde", serde(borrow))]
633    InlineHtml(CowStr<'a>),
634    /// A reference to a footnote with given label, defined
635    /// by an event with a [`Tag::FootnoteDefinition`] tag. Definitions and references to them may
636    /// occur in any order. Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
637    ///
638    /// ```markdown
639    /// [^1]
640    /// ```
641    #[cfg_attr(feature = "serde", serde(borrow))]
642    FootnoteReference(CowStr<'a>),
643    /// A [soft line break](https://spec.commonmark.org/0.31.2/#soft-line-breaks).
644    ///
645    /// Any line break that isn't a [`HardBreak`](Self::HardBreak), or the end of e.g. a paragraph.
646    SoftBreak,
647    /// A [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks).
648    ///
649    /// A line ending that is either preceded by at least two spaces or `\`.
650    ///
651    /// ```markdown
652    /// hard··
653    /// line\
654    /// breaks
655    /// ```
656    /// *`·` is a space*
657    HardBreak,
658    /// A horizontal ruler.
659    ///
660    /// ```markdown
661    /// ***
662    /// ···---
663    /// _·_··_····_··
664    /// ```
665    /// *`·` is any whitespace*
666    Rule,
667    /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
668    /// Only parsed and emitted with [`Options::ENABLE_TASKLISTS`].
669    /// ```markdown
670    /// - [ ] unchecked
671    /// - [x] checked
672    /// ```
673    TaskListMarker(bool),
674
675    /// An MDX flow expression (block-level).
676    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
677    /// ```mdx
678    /// {1 + 1}
679    /// ```
680    #[cfg_attr(feature = "serde", serde(borrow))]
681    MdxFlowExpression(CowStr<'a>),
682
683    /// An MDX text expression (inline).
684    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
685    /// ```mdx
686    /// a]n {expression} here
687    /// ```
688    #[cfg_attr(feature = "serde", serde(borrow))]
689    MdxTextExpression(CowStr<'a>),
690
691    /// An MDX ESM block (import/export at document level).
692    /// Only parsed and emitted with [`Options::ENABLE_MDX`].
693    /// ```mdx
694    /// import {Chart} from './chart.js'
695    /// export const meta = {}
696    /// ```
697    #[cfg_attr(feature = "serde", serde(borrow))]
698    MdxEsm(CowStr<'a>),
699}
700
701impl<'a> Event<'a> {
702    pub fn into_static(self) -> Event<'static> {
703        match self {
704            Event::Start(t) => Event::Start(t.into_static()),
705            Event::End(e) => Event::End(e),
706            Event::Text(s) => Event::Text(s.into_static()),
707            Event::Code(s) => Event::Code(s.into_static()),
708            Event::InlineMath(s) => Event::InlineMath(s.into_static()),
709            Event::DisplayMath(s) => Event::DisplayMath(s.into_static()),
710            Event::Html(s) => Event::Html(s.into_static()),
711            Event::InlineHtml(s) => Event::InlineHtml(s.into_static()),
712            Event::FootnoteReference(s) => Event::FootnoteReference(s.into_static()),
713            Event::SoftBreak => Event::SoftBreak,
714            Event::HardBreak => Event::HardBreak,
715            Event::Rule => Event::Rule,
716            Event::TaskListMarker(b) => Event::TaskListMarker(b),
717            Event::MdxFlowExpression(s) => Event::MdxFlowExpression(s.into_static()),
718            Event::MdxTextExpression(s) => Event::MdxTextExpression(s.into_static()),
719            Event::MdxEsm(s) => Event::MdxEsm(s.into_static()),
720        }
721    }
722}
723
724/// Table column text alignment.
725#[derive(Copy, Clone, Debug, PartialEq)]
726#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
727pub enum Alignment {
728    /// Default text alignment.
729    None,
730    Left,
731    Center,
732    Right,
733}
734
735bitflags::bitflags! {
736    /// Option struct containing flags for enabling extra features
737    /// that are not part of the CommonMark spec.
738    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
739    pub struct Options: u32 {
740        const ENABLE_TABLES = 1 << 1;
741        /// GitHub-compatible footnote syntax.
742        ///
743        /// Footnotes are referenced with the syntax `[^IDENT]`,
744        /// and defined with an identifier followed by a colon at top level.
745        ///
746        /// ---
747        ///
748        /// ```markdown
749        /// Footnote referenced [^1].
750        ///
751        /// [^1]: footnote defined
752        /// ```
753        ///
754        /// Footnote referenced [^1].
755        ///
756        /// [^1]: footnote defined
757        const ENABLE_FOOTNOTES = 1 << 2;
758        const ENABLE_STRIKETHROUGH = 1 << 3;
759        const ENABLE_TASKLISTS = 1 << 4;
760        /// Enables replacement of ASCII punctuation characters with
761        /// Unicode ligatures and smart quotes.
762        ///
763        /// This includes replacing `--` with `–`, `---` with `—`, `...` with `…`,
764        /// `”quote”` with `\u{201c}quote\u{201d}`, and `’quote’` with `\u{2018}quote\u{2019}`.
765        ///
766        /// Equivalent to enabling all of `ENABLE_SMART_QUOTES`,
767        /// `ENABLE_SMART_DASHES`, and `ENABLE_SMART_ELLIPSES`.
768        const ENABLE_SMART_PUNCTUATION = 1 << 5;
769        /// Replace straight quotes (`”`, `’`) with curly/smart quotes.
770        const ENABLE_SMART_QUOTES = 1 << 18;
771        /// Replace `--` with en-dash and `---` with em-dash.
772        const ENABLE_SMART_DASHES = 1 << 19;
773        /// Replace `...` with ellipsis (`…`).
774        const ENABLE_SMART_ELLIPSES = 1 << 20;
775        /// Extension to allow headings to have ID and classes.
776        ///
777        /// `# text { #id .class1 .class2 myattr other_attr=myvalue }`
778        /// is interpreted as a level 1 heading
779        /// with the content `text`, ID `id`, classes `class1` and `class2` and
780        /// custom attributes `myattr` (without value) and
781        /// `other_attr` with value `myvalue`.
782        /// Note that ID, classes, and custom attributes should be space-separated.
783        const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
784        /// Metadata blocks in YAML style, i.e.:
785        /// - starting with a `---` line
786        /// - ending with a `---` or `...` line
787        const ENABLE_YAML_STYLE_METADATA_BLOCKS = 1 << 7;
788        /// Metadata blocks delimited by:
789        /// - `+++` line at start
790        /// - `+++` line at end
791        const ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS = 1 << 8;
792        /// With this feature enabled, two events `Event::InlineMath` and `Event::DisplayMath`
793        /// are emitted that conventionally contain TeX formulas.
794        const ENABLE_MATH = 1 << 10;
795        /// Misc GitHub Flavored Markdown features not supported in CommonMark.
796        const ENABLE_GFM = 1 << 11;
797        /// GitHub-style blockquote alerts ([!NOTE], [!TIP], [!IMPORTANT], [!WARNING], [!CAUTION]).
798        /// Not part of the GFM spec — this is a GitHub-specific feature.
799        const ENABLE_GITHUB_ALERTS = 1 << 21;
800        /// Commonmark-HS-Extensions compatible definition lists.
801        ///
802        /// ```markdown
803        /// title 1
804        ///   : definition 1
805        ///
806        /// title 2
807        ///   : definition 2a
808        ///   : definition 2b
809        /// ```
810        const ENABLE_DEFINITION_LIST = 1 << 12;
811        const ENABLE_SUPERSCRIPT = 1 << 13;
812        const ENABLE_SUBSCRIPT = 1 << 14;
813        /// Obsidian-style Wikilinks.
814        const ENABLE_WIKILINKS = 1 << 15;
815        /// Directives: container (:::), leaf (::), and text (:) directives.
816        const ENABLE_DIRECTIVE = 1 << 16;
817        /// MDX: enables JSX elements, expressions, and ESM import/export.
818        const ENABLE_MDX = 1 << 17;
819    }
820}
821
822impl Options {
823    pub(crate) fn has_smart_quotes(&self) -> bool {
824        self.contains(Options::ENABLE_SMART_PUNCTUATION)
825            || self.contains(Options::ENABLE_SMART_QUOTES)
826    }
827
828    pub(crate) fn has_smart_dashes(&self) -> bool {
829        self.contains(Options::ENABLE_SMART_PUNCTUATION)
830            || self.contains(Options::ENABLE_SMART_DASHES)
831    }
832
833    pub(crate) fn has_smart_ellipses(&self) -> bool {
834        self.contains(Options::ENABLE_SMART_PUNCTUATION)
835            || self.contains(Options::ENABLE_SMART_ELLIPSES)
836    }
837}