satteri_pulldown_cmark/lib.rs
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to output HTML using the [HTML module](html/index.html).
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use satteri_pulldown_cmark::{Parser, Options};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! // Set up options and parser. Strikethroughs are not part of the CommonMark standard
36//! // and we therefore must enable it explicitly.
37//! let mut options = Options::empty();
38//! options.insert(Options::ENABLE_STRIKETHROUGH);
39//! let parser = Parser::new_ext(markdown_input, options);
40//!
41//! # #[cfg(feature = "html")] {
42//! // Write to String buffer.
43//! let mut html_output = String::new();
44//! satteri_pulldown_cmark::html::push_html(&mut html_output, parser);
45//!
46//! // Check that the output is what we expected.
47//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
48//! assert_eq!(expected_html, &html_output);
49//! # }
50//! ```
51//!
52//! Note that consecutive text events can happen due to the manner in which the
53//! parser evaluates the source. A utility `TextMergeStream` exists to improve
54//! the comfort of iterating the events:
55//!
56//! ```rust
57//! use satteri_pulldown_cmark::{Event, Parser, TextMergeStream};
58//!
59//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
60//!
61//! let iterator = TextMergeStream::new(Parser::new(markdown_input));
62//!
63//! for event in iterator {
64//! match event {
65//! Event::Text(text) => println!("{}", text),
66//! _ => {}
67//! }
68//! }
69//! ```
70//!
71#![warn(
72 clippy::alloc_instead_of_core,
73 clippy::std_instead_of_alloc,
74 clippy::std_instead_of_core
75)]
76// Forbid unsafe code unless the SIMD feature is enabled.
77#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
78#![warn(missing_debug_implementations)]
79#![cfg_attr(not(feature = "std"), no_std)]
80
81#[macro_use]
82extern crate alloc;
83
84#[cfg(feature = "std")]
85extern crate std;
86
87#[cfg(not(feature = "std"))]
88compile_error!("This crate requires the \"std\" feature.");
89
90use alloc::vec::Vec;
91
92#[cfg(feature = "serde")]
93use serde::{Deserialize, Serialize};
94
95#[cfg(feature = "html")]
96pub mod html;
97
98pub mod utils;
99
100pub mod arena_build;
101mod entities;
102mod firstpass;
103mod linklabel;
104mod mdx;
105mod parse;
106mod puncttable;
107mod scanners;
108mod strings;
109mod tree;
110
111use core::fmt::Display;
112
113pub use crate::{
114 arena_build::{parse, DEFAULT_OPTIONS, MDX_OPTIONS},
115 parse::{
116 BrokenLink, BrokenLinkCallback, DefaultParserCallbacks, OffsetIter, Parser,
117 ParserCallbacks, RefDefs,
118 },
119 strings::{CowStr, InlineStr},
120 utils::*,
121};
122
123/// Codeblock kind.
124#[derive(Clone, Debug, PartialEq)]
125#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
126pub enum CodeBlockKind<'a> {
127 Indented,
128 /// The value contained in the tag describes the language of the code, which may be empty.
129 #[cfg_attr(feature = "serde", serde(borrow))]
130 Fenced(CowStr<'a>),
131}
132
133impl<'a> CodeBlockKind<'a> {
134 pub fn is_indented(&self) -> bool {
135 matches!(*self, CodeBlockKind::Indented)
136 }
137
138 pub fn is_fenced(&self) -> bool {
139 matches!(*self, CodeBlockKind::Fenced(_))
140 }
141
142 pub fn into_static(self) -> CodeBlockKind<'static> {
143 match self {
144 CodeBlockKind::Indented => CodeBlockKind::Indented,
145 CodeBlockKind::Fenced(s) => CodeBlockKind::Fenced(s.into_static()),
146 }
147 }
148}
149
150/// BlockQuote kind (Note, Tip, Important, Warning, Caution).
151#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
152#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
153pub enum BlockQuoteKind {
154 Note,
155 Tip,
156 Important,
157 Warning,
158 Caution,
159}
160
161/// ContainerBlock kind (Spoiler only).
162#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
163#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
164pub enum ContainerKind {
165 Default,
166 Spoiler,
167}
168
169/// Metadata block kind.
170#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
171#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
172pub enum MetadataBlockKind {
173 YamlStyle,
174 PlusesStyle,
175}
176
177/// Tags for elements that can contain other elements.
178#[derive(Clone, Debug, PartialEq)]
179#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
180pub enum Tag<'a> {
181 /// A paragraph of text and other inline elements.
182 Paragraph,
183
184 /// A heading, with optional identifier, classes and custom attributes.
185 /// The identifier is prefixed with `#` and the last one in the attributes
186 /// list is chosen, classes are prefixed with `.` and custom attributes
187 /// have no prefix and can optionally have a value (`myattr` or `myattr=myvalue`).
188 ///
189 /// `id`, `classes` and `attrs` are only parsed and populated with [`Options::ENABLE_HEADING_ATTRIBUTES`], `None` or empty otherwise.
190 Heading {
191 level: HeadingLevel,
192 id: Option<CowStr<'a>>,
193 classes: Vec<CowStr<'a>>,
194 /// The first item of the tuple is the attr and second one the value.
195 attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
196 },
197
198 /// A block quote.
199 ///
200 /// The `BlockQuoteKind` is only parsed & populated with [`Options::ENABLE_GFM`], `None` otherwise.
201 ///
202 /// ```markdown
203 /// > regular quote
204 ///
205 /// > [!NOTE]
206 /// > note quote
207 /// ```
208 BlockQuote(Option<BlockQuoteKind>),
209 /// A code block.
210 CodeBlock(CodeBlockKind<'a>),
211 ContainerBlock(ContainerKind, CowStr<'a>),
212
213 /// An HTML block.
214 ///
215 /// A line that begins with some predefined tags (HTML block tags) (see [CommonMark Spec](https://spec.commonmark.org/0.31.2/#html-blocks) for more details) or any tag that is followed only by whitespace.
216 ///
217 /// Most HTML blocks end on an empty line, though some e.g. `<pre>` like `<script>` or `<!-- Comments -->` don't.
218 /// ```markdown
219 /// <body> Is HTML block even though here is non-whitespace.
220 /// Block ends on an empty line.
221 ///
222 /// <some-random-tag>
223 /// This is HTML block.
224 ///
225 /// <pre> Doesn't end on empty lines.
226 ///
227 /// This is still the same block.</pre>
228 /// ```
229 HtmlBlock,
230
231 /// A list. If the list is ordered the first field indicates the number of the first item.
232 /// The second field is `true` when the list is tight (no blank lines between items).
233 /// Contains only list items.
234 List(Option<u64>, bool),
235 /// A list item.
236 Item,
237 /// A footnote definition. The value contained is the footnote's label by which it can
238 /// be referred to.
239 ///
240 /// Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`] or [`Options::ENABLE_OLD_FOOTNOTES`].
241 #[cfg_attr(feature = "serde", serde(borrow))]
242 FootnoteDefinition(CowStr<'a>),
243
244 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
245 DefinitionList,
246 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
247 DefinitionListTitle,
248 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
249 DefinitionListDefinition,
250
251 /// A table. Contains a vector describing the text-alignment for each of its columns.
252 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
253 Table(Vec<Alignment>),
254 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
255 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
256 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
257 TableHead,
258 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
259 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
260 TableRow,
261 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
262 TableCell,
263
264 // span-level tags
265 /// [Emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
266 /// ```markdown
267 /// half*emph* _strong_ _multi _level__
268 /// ```
269 Emphasis,
270 /// [Strong emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
271 /// ```markdown
272 /// half**strong** __strong__ __multi __level____
273 /// ```
274 Strong,
275 /// Only parsed and emitted with [`Options::ENABLE_STRIKETHROUGH`].
276 ///
277 /// ```markdown
278 /// ~strike through~
279 /// ```
280 Strikethrough,
281 /// Only parsed and emitted with [`Options::ENABLE_SUPERSCRIPT`].
282 ///
283 /// ```markdown
284 /// ^superscript^
285 /// ```
286 Superscript,
287 /// Only parsed and emitted with [`Options::ENABLE_SUBSCRIPT`], if disabled `~something~` is parsed as [`Strikethrough`](Self::Strikethrough).
288 /// ```markdown
289 /// ~subscript~ ~~if also enabled this is strikethrough~~
290 /// ```
291 Subscript,
292
293 /// A link.
294 Link {
295 link_type: LinkType,
296 dest_url: CowStr<'a>,
297 title: CowStr<'a>,
298 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
299 id: CowStr<'a>,
300 },
301
302 /// An image. The first field is the link type, the second the destination URL and the third is a title,
303 /// the fourth is the link identifier.
304 Image {
305 link_type: LinkType,
306 dest_url: CowStr<'a>,
307 title: CowStr<'a>,
308 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
309 id: CowStr<'a>,
310 },
311
312 /// A metadata block.
313 /// Only parsed and emitted with [`Options::ENABLE_YAML_STYLE_METADATA_BLOCKS`]
314 /// or [`Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS`].
315 MetadataBlock(MetadataBlockKind),
316
317 /// An MDX JSX element (flow-level, i.e. block).
318 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
319 /// The `CowStr` is the raw JSX tag content (e.g. `Component x={1}`).
320 /// ```mdx
321 /// <Component x={1}>
322 /// children
323 /// </Component>
324 /// ```
325 #[cfg_attr(feature = "serde", serde(borrow))]
326 MdxJsxFlowElement(CowStr<'a>),
327
328 /// An MDX JSX element (text-level, i.e. inline).
329 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
330 #[cfg_attr(feature = "serde", serde(borrow))]
331 MdxJsxTextElement(CowStr<'a>),
332}
333
334impl<'a> Tag<'a> {
335 pub fn to_end(&self) -> TagEnd {
336 match self {
337 Tag::Paragraph => TagEnd::Paragraph,
338 Tag::Heading { level, .. } => TagEnd::Heading(*level),
339 Tag::BlockQuote(kind) => TagEnd::BlockQuote(*kind),
340 Tag::CodeBlock(_) => TagEnd::CodeBlock,
341 Tag::ContainerBlock(kind, _) => TagEnd::ContainerBlock(*kind),
342 Tag::HtmlBlock => TagEnd::HtmlBlock,
343 Tag::List(number, _) => TagEnd::List(number.is_some()),
344 Tag::Item => TagEnd::Item,
345 Tag::FootnoteDefinition(_) => TagEnd::FootnoteDefinition,
346 Tag::Table(_) => TagEnd::Table,
347 Tag::TableHead => TagEnd::TableHead,
348 Tag::TableRow => TagEnd::TableRow,
349 Tag::TableCell => TagEnd::TableCell,
350 Tag::Subscript => TagEnd::Subscript,
351 Tag::Superscript => TagEnd::Superscript,
352 Tag::Emphasis => TagEnd::Emphasis,
353 Tag::Strong => TagEnd::Strong,
354 Tag::Strikethrough => TagEnd::Strikethrough,
355 Tag::Link { .. } => TagEnd::Link,
356 Tag::Image { .. } => TagEnd::Image,
357 Tag::MetadataBlock(kind) => TagEnd::MetadataBlock(*kind),
358 Tag::DefinitionList => TagEnd::DefinitionList,
359 Tag::DefinitionListTitle => TagEnd::DefinitionListTitle,
360 Tag::DefinitionListDefinition => TagEnd::DefinitionListDefinition,
361 Tag::MdxJsxFlowElement(_) => TagEnd::MdxJsxFlowElement,
362 Tag::MdxJsxTextElement(_) => TagEnd::MdxJsxTextElement,
363 }
364 }
365
366 pub fn into_static(self) -> Tag<'static> {
367 match self {
368 Tag::Paragraph => Tag::Paragraph,
369 Tag::Heading {
370 level,
371 id,
372 classes,
373 attrs,
374 } => Tag::Heading {
375 level,
376 id: id.map(|s| s.into_static()),
377 classes: classes.into_iter().map(|s| s.into_static()).collect(),
378 attrs: attrs
379 .into_iter()
380 .map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
381 .collect(),
382 },
383 Tag::BlockQuote(k) => Tag::BlockQuote(k),
384 Tag::CodeBlock(kb) => Tag::CodeBlock(kb.into_static()),
385 Tag::ContainerBlock(k, s) => Tag::ContainerBlock(k, s.into_static()),
386 Tag::HtmlBlock => Tag::HtmlBlock,
387 Tag::List(v, t) => Tag::List(v, t),
388 Tag::Item => Tag::Item,
389 Tag::FootnoteDefinition(a) => Tag::FootnoteDefinition(a.into_static()),
390 Tag::Table(v) => Tag::Table(v),
391 Tag::TableHead => Tag::TableHead,
392 Tag::TableRow => Tag::TableRow,
393 Tag::TableCell => Tag::TableCell,
394 Tag::Emphasis => Tag::Emphasis,
395 Tag::Strong => Tag::Strong,
396 Tag::Strikethrough => Tag::Strikethrough,
397 Tag::Superscript => Tag::Superscript,
398 Tag::Subscript => Tag::Subscript,
399 Tag::Link {
400 link_type,
401 dest_url,
402 title,
403 id,
404 } => Tag::Link {
405 link_type,
406 dest_url: dest_url.into_static(),
407 title: title.into_static(),
408 id: id.into_static(),
409 },
410 Tag::Image {
411 link_type,
412 dest_url,
413 title,
414 id,
415 } => Tag::Image {
416 link_type,
417 dest_url: dest_url.into_static(),
418 title: title.into_static(),
419 id: id.into_static(),
420 },
421 Tag::MetadataBlock(v) => Tag::MetadataBlock(v),
422 Tag::DefinitionList => Tag::DefinitionList,
423 Tag::DefinitionListTitle => Tag::DefinitionListTitle,
424 Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
425 Tag::MdxJsxFlowElement(s) => Tag::MdxJsxFlowElement(s.into_static()),
426 Tag::MdxJsxTextElement(s) => Tag::MdxJsxTextElement(s.into_static()),
427 }
428 }
429}
430
431/// The end of a `Tag`.
432#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
433#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
434pub enum TagEnd {
435 Paragraph,
436 Heading(HeadingLevel),
437
438 BlockQuote(Option<BlockQuoteKind>),
439 CodeBlock,
440 ContainerBlock(ContainerKind),
441
442 HtmlBlock,
443
444 /// A list, `true` for ordered lists.
445 List(bool),
446 Item,
447 FootnoteDefinition,
448
449 DefinitionList,
450 DefinitionListTitle,
451 DefinitionListDefinition,
452
453 Table,
454 TableHead,
455 TableRow,
456 TableCell,
457
458 Emphasis,
459 Strong,
460 Strikethrough,
461 Superscript,
462 Subscript,
463
464 Link,
465 Image,
466
467 MetadataBlock(MetadataBlockKind),
468
469 MdxJsxFlowElement,
470 MdxJsxTextElement,
471}
472
473/// Make sure `TagEnd` is no more than two bytes in size.
474/// This is why it's used instead of just using `Tag`.
475#[cfg(target_pointer_width = "64")]
476const _STATIC_ASSERT_TAG_END_SIZE: [(); 2] = [(); core::mem::size_of::<TagEnd>()];
477
478impl<'a> From<Tag<'a>> for TagEnd {
479 fn from(value: Tag) -> Self {
480 value.to_end()
481 }
482}
483
484#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
485#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
486pub enum HeadingLevel {
487 H1 = 1,
488 H2,
489 H3,
490 H4,
491 H5,
492 H6,
493}
494
495impl Display for HeadingLevel {
496 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
497 match self {
498 Self::H1 => write!(f, "h1"),
499 Self::H2 => write!(f, "h2"),
500 Self::H3 => write!(f, "h3"),
501 Self::H4 => write!(f, "h4"),
502 Self::H5 => write!(f, "h5"),
503 Self::H6 => write!(f, "h6"),
504 }
505 }
506}
507
508/// Returned when trying to convert a `usize` into a `Heading` but it fails
509/// because the usize isn't a valid heading level
510#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
511pub struct InvalidHeadingLevel(usize);
512
513impl TryFrom<usize> for HeadingLevel {
514 type Error = InvalidHeadingLevel;
515
516 fn try_from(value: usize) -> Result<Self, Self::Error> {
517 match value {
518 1 => Ok(Self::H1),
519 2 => Ok(Self::H2),
520 3 => Ok(Self::H3),
521 4 => Ok(Self::H4),
522 5 => Ok(Self::H5),
523 6 => Ok(Self::H6),
524 _ => Err(InvalidHeadingLevel(value)),
525 }
526 }
527}
528
529/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
530#[derive(Clone, Debug, PartialEq, Copy)]
531#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
532pub enum LinkType {
533 /// Inline link like `[foo](bar)`
534 Inline,
535 /// Reference link like `[foo][bar]`
536 Reference,
537 /// Reference without destination in the document, but resolved by the broken_link_callback
538 ReferenceUnknown,
539 /// Collapsed link like `[foo][]`
540 Collapsed,
541 /// Collapsed link without destination in the document, but resolved by the broken_link_callback
542 CollapsedUnknown,
543 /// Shortcut link like `[foo]`
544 Shortcut,
545 /// Shortcut without destination in the document, but resolved by the broken_link_callback
546 ShortcutUnknown,
547 /// Autolink like `<http://foo.bar/baz>`
548 Autolink,
549 /// Email address in autolink like `<john@example.org>`
550 Email,
551 /// Wikilink link like `[[foo]]` or `[[foo|bar]]`
552 WikiLink {
553 /// `true` if the wikilink was piped.
554 ///
555 /// * `true` - `[[foo|bar]]`
556 /// * `false` - `[[foo]]`
557 has_pothole: bool,
558 },
559}
560
561impl LinkType {
562 /// Map the link type to an equivalent _Unknown link type.
563 fn to_unknown(self) -> Self {
564 match self {
565 LinkType::Reference => LinkType::ReferenceUnknown,
566 LinkType::Collapsed => LinkType::CollapsedUnknown,
567 LinkType::Shortcut => LinkType::ShortcutUnknown,
568 _ => unreachable!(),
569 }
570 }
571}
572
573/// Markdown events that are generated in a preorder traversal of the document
574/// tree, with additional `End` events whenever all of an inner node's children
575/// have been visited.
576#[derive(Clone, Debug, PartialEq)]
577#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
578pub enum Event<'a> {
579 /// Start of a tagged element. Events that are yielded after this event
580 /// and before its corresponding `End` event are inside this element.
581 /// Start and end events are guaranteed to be balanced.
582 #[cfg_attr(feature = "serde", serde(borrow))]
583 Start(Tag<'a>),
584 /// End of a tagged element.
585 End(TagEnd),
586 /// A text node.
587 ///
588 /// All text, outside and inside [`Tag`]s.
589 #[cfg_attr(feature = "serde", serde(borrow))]
590 Text(CowStr<'a>),
591 /// An [inline code node](https://spec.commonmark.org/0.31.2/#code-spans).
592 ///
593 /// ```markdown
594 /// `code`
595 /// ```
596 #[cfg_attr(feature = "serde", serde(borrow))]
597 Code(CowStr<'a>),
598 /// An inline math environment node.
599 /// Requires [`Options::ENABLE_MATH`].
600 ///
601 /// ```markdown
602 /// $math$
603 /// ```
604 #[cfg_attr(feature = "serde", serde(borrow))]
605 InlineMath(CowStr<'a>),
606 /// A display math environment node.
607 /// Requires [`Options::ENABLE_MATH`].
608 ///
609 /// ```markdown
610 /// $$math$$
611 /// ```
612 #[cfg_attr(feature = "serde", serde(borrow))]
613 DisplayMath(CowStr<'a>),
614 /// An HTML node.
615 ///
616 /// A line of HTML inside [`Tag::HtmlBlock`] includes the line break.
617 #[cfg_attr(feature = "serde", serde(borrow))]
618 Html(CowStr<'a>),
619 /// An [inline HTML node](https://spec.commonmark.org/0.31.2/#raw-html).
620 ///
621 /// Contains only the tag itself, e.g. `<open-tag>`, `</close-tag>` or `<!-- comment -->`.
622 ///
623 /// **Note**: Under some conditions HTML can also be parsed as an HTML Block, see [`Tag::HtmlBlock`] for details.
624 #[cfg_attr(feature = "serde", serde(borrow))]
625 InlineHtml(CowStr<'a>),
626 /// A reference to a footnote with given label, defined
627 /// by an event with a [`Tag::FootnoteDefinition`] tag. Definitions and references to them may
628 /// occur in any order. Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`] or [`Options::ENABLE_OLD_FOOTNOTES`].
629 ///
630 /// ```markdown
631 /// [^1]
632 /// ```
633 #[cfg_attr(feature = "serde", serde(borrow))]
634 FootnoteReference(CowStr<'a>),
635 /// A [soft line break](https://spec.commonmark.org/0.31.2/#soft-line-breaks).
636 ///
637 /// Any line break that isn't a [`HardBreak`](Self::HardBreak), or the end of e.g. a paragraph.
638 SoftBreak,
639 /// A [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks).
640 ///
641 /// A line ending that is either preceded by at least two spaces or `\`.
642 ///
643 /// ```markdown
644 /// hard··
645 /// line\
646 /// breaks
647 /// ```
648 /// *`·` is a space*
649 HardBreak,
650 /// A horizontal ruler.
651 ///
652 /// ```markdown
653 /// ***
654 /// ···---
655 /// _·_··_····_··
656 /// ```
657 /// *`·` is any whitespace*
658 Rule,
659 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
660 /// Only parsed and emitted with [`Options::ENABLE_TASKLISTS`].
661 /// ```markdown
662 /// - [ ] unchecked
663 /// - [x] checked
664 /// ```
665 TaskListMarker(bool),
666
667 /// An MDX flow expression (block-level).
668 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
669 /// ```mdx
670 /// {1 + 1}
671 /// ```
672 #[cfg_attr(feature = "serde", serde(borrow))]
673 MdxFlowExpression(CowStr<'a>),
674
675 /// An MDX text expression (inline).
676 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
677 /// ```mdx
678 /// a]n {expression} here
679 /// ```
680 #[cfg_attr(feature = "serde", serde(borrow))]
681 MdxTextExpression(CowStr<'a>),
682
683 /// An MDX ESM block (import/export at document level).
684 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
685 /// ```mdx
686 /// import {Chart} from './chart.js'
687 /// export const meta = {}
688 /// ```
689 #[cfg_attr(feature = "serde", serde(borrow))]
690 MdxEsm(CowStr<'a>),
691}
692
693impl<'a> Event<'a> {
694 pub fn into_static(self) -> Event<'static> {
695 match self {
696 Event::Start(t) => Event::Start(t.into_static()),
697 Event::End(e) => Event::End(e),
698 Event::Text(s) => Event::Text(s.into_static()),
699 Event::Code(s) => Event::Code(s.into_static()),
700 Event::InlineMath(s) => Event::InlineMath(s.into_static()),
701 Event::DisplayMath(s) => Event::DisplayMath(s.into_static()),
702 Event::Html(s) => Event::Html(s.into_static()),
703 Event::InlineHtml(s) => Event::InlineHtml(s.into_static()),
704 Event::FootnoteReference(s) => Event::FootnoteReference(s.into_static()),
705 Event::SoftBreak => Event::SoftBreak,
706 Event::HardBreak => Event::HardBreak,
707 Event::Rule => Event::Rule,
708 Event::TaskListMarker(b) => Event::TaskListMarker(b),
709 Event::MdxFlowExpression(s) => Event::MdxFlowExpression(s.into_static()),
710 Event::MdxTextExpression(s) => Event::MdxTextExpression(s.into_static()),
711 Event::MdxEsm(s) => Event::MdxEsm(s.into_static()),
712 }
713 }
714}
715
716/// Table column text alignment.
717#[derive(Copy, Clone, Debug, PartialEq)]
718#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
719pub enum Alignment {
720 /// Default text alignment.
721 None,
722 Left,
723 Center,
724 Right,
725}
726
727bitflags::bitflags! {
728 /// Option struct containing flags for enabling extra features
729 /// that are not part of the CommonMark spec.
730 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
731 pub struct Options: u32 {
732 const ENABLE_TABLES = 1 << 1;
733 /// GitHub-compatible footnote syntax.
734 ///
735 /// Footnotes are referenced with the syntax `[^IDENT]`,
736 /// and defined with an identifier followed by a colon at top level.
737 ///
738 /// ---
739 ///
740 /// ```markdown
741 /// Footnote referenced [^1].
742 ///
743 /// [^1]: footnote defined
744 /// ```
745 ///
746 /// Footnote referenced [^1].
747 ///
748 /// [^1]: footnote defined
749 const ENABLE_FOOTNOTES = 1 << 2;
750 const ENABLE_STRIKETHROUGH = 1 << 3;
751 const ENABLE_TASKLISTS = 1 << 4;
752 /// Enables replacement of ASCII punctuation characters with
753 /// Unicode ligatures and smart quotes.
754 ///
755 /// This includes replacing `--` with `—`, `---` with `—`, `...` with `…`,
756 /// `"quote"` with `“quote”`, and `'quote'` with `‘quote’`.
757 ///
758 /// The replacement takes place during the parsing of the document.
759 const ENABLE_SMART_PUNCTUATION = 1 << 5;
760 /// Extension to allow headings to have ID and classes.
761 ///
762 /// `# text { #id .class1 .class2 myattr other_attr=myvalue }`
763 /// is interpreted as a level 1 heading
764 /// with the content `text`, ID `id`, classes `class1` and `class2` and
765 /// custom attributes `myattr` (without value) and
766 /// `other_attr` with value `myvalue`.
767 /// Note that ID, classes, and custom attributes should be space-separated.
768 const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
769 /// Metadata blocks in YAML style, i.e.:
770 /// - starting with a `---` line
771 /// - ending with a `---` or `...` line
772 const ENABLE_YAML_STYLE_METADATA_BLOCKS = 1 << 7;
773 /// Metadata blocks delimited by:
774 /// - `+++` line at start
775 /// - `+++` line at end
776 const ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS = 1 << 8;
777 /// Older footnote syntax. This flag implies `ENABLE_FOOTNOTES`, changing it to use an
778 /// older syntax instead of the new, default, GitHub-compatible syntax.
779 ///
780 /// New syntax is different from the old syntax regarding
781 /// indentation, nesting, and footnote references with no definition:
782 ///
783 /// ```markdown
784 /// [^1]: In new syntax, this is two footnote definitions.
785 /// [^2]: In old syntax, this is a single footnote definition with two lines.
786 ///
787 /// [^3]:
788 ///
789 /// In new syntax, this is a footnote with two paragraphs.
790 ///
791 /// In old syntax, this is a footnote followed by a code block.
792 ///
793 /// In new syntax, this undefined footnote definition renders as
794 /// literal text [^4]. In old syntax, it creates a dangling link.
795 /// ```
796 const ENABLE_OLD_FOOTNOTES = (1 << 9) | (1 << 2);
797 /// With this feature enabled, two events `Event::InlineMath` and `Event::DisplayMath`
798 /// are emitted that conventionally contain TeX formulas.
799 const ENABLE_MATH = 1 << 10;
800 /// Misc GitHub Flavored Markdown features not supported in CommonMark.
801 /// The following features are currently behind this tag:
802 /// - Blockquote tags ([!NOTE], [!TIP], [!IMPORTANT], [!WARNING], [!CAUTION]).
803 const ENABLE_GFM = 1 << 11;
804 /// Commonmark-HS-Extensions compatible definition lists.
805 ///
806 /// ```markdown
807 /// title 1
808 /// : definition 1
809 ///
810 /// title 2
811 /// : definition 2a
812 /// : definition 2b
813 /// ```
814 const ENABLE_DEFINITION_LIST = 1 << 12;
815 const ENABLE_SUPERSCRIPT = 1 << 13;
816 const ENABLE_SUBSCRIPT = 1 << 14;
817 /// Obsidian-style Wikilinks.
818 const ENABLE_WIKILINKS = 1 << 15;
819 /// Colon-delimited Container Extension Blocks.
820 const ENABLE_CONTAINER_EXTENSIONS = 1 << 16;
821 /// MDX: enables JSX elements, expressions, and ESM import/export.
822 const ENABLE_MDX = 1 << 17;
823 }
824}
825
826impl Options {
827 pub(crate) fn has_gfm_footnotes(&self) -> bool {
828 self.contains(Options::ENABLE_FOOTNOTES) && !self.contains(Options::ENABLE_OLD_FOOTNOTES)
829 }
830}