satteri_pulldown_cmark/lib.rs
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to build an arena representation via [`parse()`].
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use satteri_pulldown_cmark::{parse, Options};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! let mut options = Options::empty();
36//! options.insert(Options::ENABLE_STRIKETHROUGH);
37//! let (arena, _) = parse(markdown_input, options);
38//! let html = satteri_ast::mdast_to_html(&arena);
39//!
40//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
41//! assert_eq!(expected_html, &html);
42//! ```
43//!
44//! Note that consecutive text events can happen due to the manner in which the
45//! parser evaluates the source. A utility `TextMergeStream` exists to improve
46//! the comfort of iterating the events:
47//!
48//! ```rust
49//! use satteri_pulldown_cmark::{Event, Parser, TextMergeStream};
50//!
51//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
52//!
53//! let iterator = TextMergeStream::new(Parser::new(markdown_input));
54//!
55//! for event in iterator {
56//! match event {
57//! Event::Text(text) => println!("{}", text),
58//! _ => {}
59//! }
60//! }
61//! ```
62//!
63#![warn(
64 clippy::alloc_instead_of_core,
65 clippy::std_instead_of_alloc,
66 clippy::std_instead_of_core
67)]
68// Forbid unsafe code unless the SIMD feature is enabled.
69#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
70#![warn(missing_debug_implementations)]
71#![cfg_attr(not(feature = "std"), no_std)]
72
73#[macro_use]
74extern crate alloc;
75
76#[cfg(feature = "std")]
77extern crate std;
78
79#[cfg(not(feature = "std"))]
80compile_error!("This crate requires the \"std\" feature.");
81
82use alloc::vec::Vec;
83
84#[cfg(feature = "serde")]
85use serde::{Deserialize, Serialize};
86
87pub mod utils;
88
89pub mod arena_build;
90mod entities;
91mod firstpass;
92mod linklabel;
93#[cfg(feature = "mdx")]
94mod mdx;
95mod parse;
96pub(crate) mod post_passes;
97mod puncttable;
98mod scanners;
99mod strings;
100mod tree;
101
102use core::fmt::Display;
103
104#[cfg(feature = "mdx")]
105pub use crate::arena_build::MDX_OPTIONS;
106pub use crate::{
107 arena_build::{parse, DEFAULT_OPTIONS},
108 parse::{
109 BrokenLink, BrokenLinkCallback, DefaultParserCallbacks, OffsetIter, Parser,
110 ParserCallbacks, RefDefs,
111 },
112 strings::{CowStr, InlineStr},
113 utils::*,
114};
115
116/// Codeblock kind.
117#[derive(Clone, Debug, PartialEq)]
118#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
119pub enum CodeBlockKind<'a> {
120 Indented,
121 /// The value contained in the tag describes the language of the code, which may be empty.
122 #[cfg_attr(feature = "serde", serde(borrow))]
123 Fenced(CowStr<'a>),
124}
125
126impl<'a> CodeBlockKind<'a> {
127 pub fn is_indented(&self) -> bool {
128 matches!(*self, CodeBlockKind::Indented)
129 }
130
131 pub fn is_fenced(&self) -> bool {
132 matches!(*self, CodeBlockKind::Fenced(_))
133 }
134
135 pub fn into_static(self) -> CodeBlockKind<'static> {
136 match self {
137 CodeBlockKind::Indented => CodeBlockKind::Indented,
138 CodeBlockKind::Fenced(s) => CodeBlockKind::Fenced(s.into_static()),
139 }
140 }
141}
142
143/// BlockQuote kind (Note, Tip, Important, Warning, Caution).
144#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
145#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
146pub enum BlockQuoteKind {
147 Note,
148 Tip,
149 Important,
150 Warning,
151 Caution,
152}
153
154/// Directive kind.
155#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
156#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
157pub enum DirectiveKind {
158 Container,
159 Leaf,
160 Text,
161}
162
163/// Metadata block kind.
164#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
165#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
166pub enum MetadataBlockKind {
167 YamlStyle,
168 PlusesStyle,
169}
170
171/// Tags for elements that can contain other elements.
172#[derive(Clone, Debug, PartialEq)]
173#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
174pub enum Tag<'a> {
175 /// A paragraph of text and other inline elements.
176 Paragraph,
177
178 /// A heading, with optional identifier, classes and custom attributes.
179 /// The identifier is prefixed with `#` and the last one in the attributes
180 /// list is chosen, classes are prefixed with `.` and custom attributes
181 /// have no prefix and can optionally have a value (`myattr` or `myattr=myvalue`).
182 ///
183 /// `id`, `classes` and `attrs` are only parsed and populated with [`Options::ENABLE_HEADING_ATTRIBUTES`], `None` or empty otherwise.
184 Heading {
185 level: HeadingLevel,
186 id: Option<CowStr<'a>>,
187 classes: Vec<CowStr<'a>>,
188 /// The first item of the tuple is the attr and second one the value.
189 attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
190 },
191
192 /// A block quote.
193 ///
194 /// The `BlockQuoteKind` is only parsed & populated with [`Options::ENABLE_GFM`], `None` otherwise.
195 ///
196 /// ```markdown
197 /// > regular quote
198 ///
199 /// > [!NOTE]
200 /// > note quote
201 /// ```
202 BlockQuote(Option<BlockQuoteKind>),
203 /// A code block.
204 CodeBlock(CodeBlockKind<'a>),
205 /// A directive (container, leaf, or text).
206 /// Only parsed and emitted with [`Options::ENABLE_DIRECTIVE`].
207 Directive {
208 kind: DirectiveKind,
209 name: CowStr<'a>,
210 attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
211 },
212
213 /// An HTML block.
214 ///
215 /// A line that begins with some predefined tags (HTML block tags) (see [CommonMark Spec](https://spec.commonmark.org/0.31.2/#html-blocks) for more details) or any tag that is followed only by whitespace.
216 ///
217 /// Most HTML blocks end on an empty line, though some e.g. `<pre>` like `<script>` or `<!-- Comments -->` don't.
218 /// ```markdown
219 /// <body> Is HTML block even though here is non-whitespace.
220 /// Block ends on an empty line.
221 ///
222 /// <some-random-tag>
223 /// This is HTML block.
224 ///
225 /// <pre> Doesn't end on empty lines.
226 ///
227 /// This is still the same block.</pre>
228 /// ```
229 HtmlBlock,
230
231 /// A list. If the list is ordered the first field indicates the number of the first item.
232 /// The second field is `true` when the list is tight (no blank lines between items).
233 /// Contains only list items.
234 List(Option<u64>, bool),
235 /// A list item.
236 Item,
237 /// A footnote definition. The value contained is the footnote's label by which it can
238 /// be referred to.
239 ///
240 /// Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
241 #[cfg_attr(feature = "serde", serde(borrow))]
242 FootnoteDefinition(CowStr<'a>),
243
244 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
245 DefinitionList,
246 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
247 DefinitionListTitle,
248 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
249 DefinitionListDefinition,
250
251 /// A table. Contains a vector describing the text-alignment for each of its columns.
252 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
253 Table(Vec<Alignment>),
254 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
255 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
256 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
257 TableHead,
258 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
259 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
260 TableRow,
261 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
262 TableCell,
263
264 // span-level tags
265 /// [Emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
266 /// ```markdown
267 /// half*emph* _strong_ _multi _level__
268 /// ```
269 Emphasis,
270 /// [Strong emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
271 /// ```markdown
272 /// half**strong** __strong__ __multi __level____
273 /// ```
274 Strong,
275 /// Only parsed and emitted with [`Options::ENABLE_STRIKETHROUGH`].
276 ///
277 /// ```markdown
278 /// ~strike through~
279 /// ```
280 Strikethrough,
281 /// Only parsed and emitted with [`Options::ENABLE_SUPERSCRIPT`].
282 ///
283 /// ```markdown
284 /// ^superscript^
285 /// ```
286 Superscript,
287 /// Only parsed and emitted with [`Options::ENABLE_SUBSCRIPT`], if disabled `~something~` is parsed as [`Strikethrough`](Self::Strikethrough).
288 /// ```markdown
289 /// ~subscript~ ~~if also enabled this is strikethrough~~
290 /// ```
291 Subscript,
292
293 /// A link.
294 Link {
295 link_type: LinkType,
296 dest_url: CowStr<'a>,
297 title: CowStr<'a>,
298 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
299 id: CowStr<'a>,
300 },
301
302 /// An image. The first field is the link type, the second the destination URL and the third is a title,
303 /// the fourth is the link identifier.
304 Image {
305 link_type: LinkType,
306 dest_url: CowStr<'a>,
307 title: CowStr<'a>,
308 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
309 id: CowStr<'a>,
310 },
311
312 /// A metadata block.
313 /// Only parsed and emitted with [`Options::ENABLE_YAML_STYLE_METADATA_BLOCKS`]
314 /// or [`Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS`].
315 MetadataBlock(MetadataBlockKind),
316
317 /// An MDX JSX element (flow-level, i.e. block).
318 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
319 /// The `CowStr` is the raw JSX tag content (e.g. `Component x={1}`).
320 /// ```mdx
321 /// <Component x={1}>
322 /// children
323 /// </Component>
324 /// ```
325 #[cfg(feature = "mdx")]
326 #[cfg_attr(feature = "serde", serde(borrow))]
327 MdxJsxFlowElement(CowStr<'a>),
328
329 /// An MDX JSX element (text-level, i.e. inline).
330 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
331 #[cfg(feature = "mdx")]
332 #[cfg_attr(feature = "serde", serde(borrow))]
333 MdxJsxTextElement(CowStr<'a>),
334}
335
336impl<'a> Tag<'a> {
337 pub fn to_end(&self) -> TagEnd {
338 match self {
339 Tag::Paragraph => TagEnd::Paragraph,
340 Tag::Heading { level, .. } => TagEnd::Heading(*level),
341 Tag::BlockQuote(kind) => TagEnd::BlockQuote(*kind),
342 Tag::CodeBlock(_) => TagEnd::CodeBlock,
343 Tag::Directive { kind, .. } => TagEnd::Directive(*kind),
344 Tag::HtmlBlock => TagEnd::HtmlBlock,
345 Tag::List(number, _) => TagEnd::List(number.is_some()),
346 Tag::Item => TagEnd::Item,
347 Tag::FootnoteDefinition(_) => TagEnd::FootnoteDefinition,
348 Tag::Table(_) => TagEnd::Table,
349 Tag::TableHead => TagEnd::TableHead,
350 Tag::TableRow => TagEnd::TableRow,
351 Tag::TableCell => TagEnd::TableCell,
352 Tag::Subscript => TagEnd::Subscript,
353 Tag::Superscript => TagEnd::Superscript,
354 Tag::Emphasis => TagEnd::Emphasis,
355 Tag::Strong => TagEnd::Strong,
356 Tag::Strikethrough => TagEnd::Strikethrough,
357 Tag::Link { .. } => TagEnd::Link,
358 Tag::Image { .. } => TagEnd::Image,
359 Tag::MetadataBlock(kind) => TagEnd::MetadataBlock(*kind),
360 Tag::DefinitionList => TagEnd::DefinitionList,
361 Tag::DefinitionListTitle => TagEnd::DefinitionListTitle,
362 Tag::DefinitionListDefinition => TagEnd::DefinitionListDefinition,
363 #[cfg(feature = "mdx")]
364 Tag::MdxJsxFlowElement(_) => TagEnd::MdxJsxFlowElement,
365 #[cfg(feature = "mdx")]
366 Tag::MdxJsxTextElement(_) => TagEnd::MdxJsxTextElement,
367 }
368 }
369
370 pub fn into_static(self) -> Tag<'static> {
371 match self {
372 Tag::Paragraph => Tag::Paragraph,
373 Tag::Heading {
374 level,
375 id,
376 classes,
377 attrs,
378 } => Tag::Heading {
379 level,
380 id: id.map(|s| s.into_static()),
381 classes: classes.into_iter().map(|s| s.into_static()).collect(),
382 attrs: attrs
383 .into_iter()
384 .map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
385 .collect(),
386 },
387 Tag::BlockQuote(k) => Tag::BlockQuote(k),
388 Tag::CodeBlock(kb) => Tag::CodeBlock(kb.into_static()),
389 Tag::Directive {
390 kind,
391 name,
392 attributes,
393 } => Tag::Directive {
394 kind,
395 name: name.into_static(),
396 attributes: attributes
397 .into_iter()
398 .map(|(k, v)| (k.into_static(), v.into_static()))
399 .collect(),
400 },
401 Tag::HtmlBlock => Tag::HtmlBlock,
402 Tag::List(v, t) => Tag::List(v, t),
403 Tag::Item => Tag::Item,
404 Tag::FootnoteDefinition(a) => Tag::FootnoteDefinition(a.into_static()),
405 Tag::Table(v) => Tag::Table(v),
406 Tag::TableHead => Tag::TableHead,
407 Tag::TableRow => Tag::TableRow,
408 Tag::TableCell => Tag::TableCell,
409 Tag::Emphasis => Tag::Emphasis,
410 Tag::Strong => Tag::Strong,
411 Tag::Strikethrough => Tag::Strikethrough,
412 Tag::Superscript => Tag::Superscript,
413 Tag::Subscript => Tag::Subscript,
414 Tag::Link {
415 link_type,
416 dest_url,
417 title,
418 id,
419 } => Tag::Link {
420 link_type,
421 dest_url: dest_url.into_static(),
422 title: title.into_static(),
423 id: id.into_static(),
424 },
425 Tag::Image {
426 link_type,
427 dest_url,
428 title,
429 id,
430 } => Tag::Image {
431 link_type,
432 dest_url: dest_url.into_static(),
433 title: title.into_static(),
434 id: id.into_static(),
435 },
436 Tag::MetadataBlock(v) => Tag::MetadataBlock(v),
437 Tag::DefinitionList => Tag::DefinitionList,
438 Tag::DefinitionListTitle => Tag::DefinitionListTitle,
439 Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
440 #[cfg(feature = "mdx")]
441 Tag::MdxJsxFlowElement(s) => Tag::MdxJsxFlowElement(s.into_static()),
442 #[cfg(feature = "mdx")]
443 Tag::MdxJsxTextElement(s) => Tag::MdxJsxTextElement(s.into_static()),
444 }
445 }
446}
447
448/// The end of a `Tag`.
449#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
450#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
451pub enum TagEnd {
452 Paragraph,
453 Heading(HeadingLevel),
454
455 BlockQuote(Option<BlockQuoteKind>),
456 CodeBlock,
457 Directive(DirectiveKind),
458
459 HtmlBlock,
460
461 /// A list, `true` for ordered lists.
462 List(bool),
463 Item,
464 FootnoteDefinition,
465
466 DefinitionList,
467 DefinitionListTitle,
468 DefinitionListDefinition,
469
470 Table,
471 TableHead,
472 TableRow,
473 TableCell,
474
475 Emphasis,
476 Strong,
477 Strikethrough,
478 Superscript,
479 Subscript,
480
481 Link,
482 Image,
483
484 MetadataBlock(MetadataBlockKind),
485
486 #[cfg(feature = "mdx")]
487 MdxJsxFlowElement,
488 #[cfg(feature = "mdx")]
489 MdxJsxTextElement,
490}
491
492/// Make sure `TagEnd` is no more than two bytes in size.
493/// This is why it's used instead of just using `Tag`.
494#[cfg(target_pointer_width = "64")]
495const _STATIC_ASSERT_TAG_END_SIZE: [(); 2] = [(); core::mem::size_of::<TagEnd>()];
496
497impl<'a> From<Tag<'a>> for TagEnd {
498 fn from(value: Tag) -> Self {
499 value.to_end()
500 }
501}
502
503#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
504#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
505pub enum HeadingLevel {
506 H1 = 1,
507 H2,
508 H3,
509 H4,
510 H5,
511 H6,
512}
513
514impl Display for HeadingLevel {
515 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
516 match self {
517 Self::H1 => write!(f, "h1"),
518 Self::H2 => write!(f, "h2"),
519 Self::H3 => write!(f, "h3"),
520 Self::H4 => write!(f, "h4"),
521 Self::H5 => write!(f, "h5"),
522 Self::H6 => write!(f, "h6"),
523 }
524 }
525}
526
527/// Returned when trying to convert a `usize` into a `Heading` but it fails
528/// because the usize isn't a valid heading level
529#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
530pub struct InvalidHeadingLevel(usize);
531
532impl TryFrom<usize> for HeadingLevel {
533 type Error = InvalidHeadingLevel;
534
535 fn try_from(value: usize) -> Result<Self, Self::Error> {
536 match value {
537 1 => Ok(Self::H1),
538 2 => Ok(Self::H2),
539 3 => Ok(Self::H3),
540 4 => Ok(Self::H4),
541 5 => Ok(Self::H5),
542 6 => Ok(Self::H6),
543 _ => Err(InvalidHeadingLevel(value)),
544 }
545 }
546}
547
548/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
549#[derive(Clone, Debug, PartialEq, Copy)]
550#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
551pub enum LinkType {
552 /// Inline link like `[foo](bar)`
553 Inline,
554 /// Reference link like `[foo][bar]`
555 Reference,
556 /// Reference without destination in the document, but resolved by the broken_link_callback
557 ReferenceUnknown,
558 /// Collapsed link like `[foo][]`
559 Collapsed,
560 /// Collapsed link without destination in the document, but resolved by the broken_link_callback
561 CollapsedUnknown,
562 /// Shortcut link like `[foo]`
563 Shortcut,
564 /// Shortcut without destination in the document, but resolved by the broken_link_callback
565 ShortcutUnknown,
566 /// Autolink like `<http://foo.bar/baz>`
567 Autolink,
568 /// Email address in autolink like `<john@example.org>`
569 Email,
570 /// Wikilink link like `[[foo]]` or `[[foo|bar]]`
571 WikiLink {
572 /// `true` if the wikilink was piped.
573 ///
574 /// * `true` - `[[foo|bar]]`
575 /// * `false` - `[[foo]]`
576 has_pothole: bool,
577 },
578}
579
580impl LinkType {
581 /// Map the link type to an equivalent _Unknown link type.
582 fn to_unknown(self) -> Self {
583 match self {
584 LinkType::Reference => LinkType::ReferenceUnknown,
585 LinkType::Collapsed => LinkType::CollapsedUnknown,
586 LinkType::Shortcut => LinkType::ShortcutUnknown,
587 _ => unreachable!(),
588 }
589 }
590}
591
592/// Markdown events that are generated in a preorder traversal of the document
593/// tree, with additional `End` events whenever all of an inner node's children
594/// have been visited.
595#[derive(Clone, Debug, PartialEq)]
596#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
597pub enum Event<'a> {
598 /// Start of a tagged element. Events that are yielded after this event
599 /// and before its corresponding `End` event are inside this element.
600 /// Start and end events are guaranteed to be balanced.
601 #[cfg_attr(feature = "serde", serde(borrow))]
602 Start(Tag<'a>),
603 /// End of a tagged element.
604 End(TagEnd),
605 /// A text node.
606 ///
607 /// All text, outside and inside [`Tag`]s.
608 #[cfg_attr(feature = "serde", serde(borrow))]
609 Text(CowStr<'a>),
610 /// An [inline code node](https://spec.commonmark.org/0.31.2/#code-spans).
611 ///
612 /// ```markdown
613 /// `code`
614 /// ```
615 #[cfg_attr(feature = "serde", serde(borrow))]
616 Code(CowStr<'a>),
617 /// An inline math environment node.
618 /// Requires [`Options::ENABLE_MATH`].
619 ///
620 /// ```markdown
621 /// $math$
622 /// ```
623 #[cfg_attr(feature = "serde", serde(borrow))]
624 InlineMath(CowStr<'a>),
625 /// A display math environment node.
626 /// Requires [`Options::ENABLE_MATH`].
627 ///
628 /// ```markdown
629 /// $$math$$
630 /// ```
631 #[cfg_attr(feature = "serde", serde(borrow))]
632 DisplayMath(CowStr<'a>),
633 /// An HTML node.
634 ///
635 /// A line of HTML inside [`Tag::HtmlBlock`] includes the line break.
636 #[cfg_attr(feature = "serde", serde(borrow))]
637 Html(CowStr<'a>),
638 /// An [inline HTML node](https://spec.commonmark.org/0.31.2/#raw-html).
639 ///
640 /// Contains only the tag itself, e.g. `<open-tag>`, `</close-tag>` or `<!-- comment -->`.
641 ///
642 /// **Note**: Under some conditions HTML can also be parsed as an HTML Block, see [`Tag::HtmlBlock`] for details.
643 #[cfg_attr(feature = "serde", serde(borrow))]
644 InlineHtml(CowStr<'a>),
645 /// A reference to a footnote with given label, defined
646 /// by an event with a [`Tag::FootnoteDefinition`] tag. Definitions and references to them may
647 /// occur in any order. Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
648 ///
649 /// ```markdown
650 /// [^1]
651 /// ```
652 #[cfg_attr(feature = "serde", serde(borrow))]
653 FootnoteReference(CowStr<'a>),
654 /// A [soft line break](https://spec.commonmark.org/0.31.2/#soft-line-breaks).
655 ///
656 /// Any line break that isn't a [`HardBreak`](Self::HardBreak), or the end of e.g. a paragraph.
657 SoftBreak,
658 /// A [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks).
659 ///
660 /// A line ending that is either preceded by at least two spaces or `\`.
661 ///
662 /// ```markdown
663 /// hard··
664 /// line\
665 /// breaks
666 /// ```
667 /// *`·` is a space*
668 HardBreak,
669 /// A horizontal ruler.
670 ///
671 /// ```markdown
672 /// ***
673 /// ···---
674 /// _·_··_····_··
675 /// ```
676 /// *`·` is any whitespace*
677 Rule,
678 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
679 /// Only parsed and emitted with [`Options::ENABLE_TASKLISTS`].
680 /// ```markdown
681 /// - [ ] unchecked
682 /// - [x] checked
683 /// ```
684 TaskListMarker(bool),
685
686 /// An MDX flow expression (block-level).
687 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
688 /// ```mdx
689 /// {1 + 1}
690 /// ```
691 #[cfg(feature = "mdx")]
692 #[cfg_attr(feature = "serde", serde(borrow))]
693 MdxFlowExpression(CowStr<'a>),
694
695 /// An MDX text expression (inline).
696 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
697 /// ```mdx
698 /// a]n {expression} here
699 /// ```
700 #[cfg(feature = "mdx")]
701 #[cfg_attr(feature = "serde", serde(borrow))]
702 MdxTextExpression(CowStr<'a>),
703
704 /// An MDX ESM block (import/export at document level).
705 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
706 /// ```mdx
707 /// import {Chart} from './chart.js'
708 /// export const meta = {}
709 /// ```
710 #[cfg(feature = "mdx")]
711 #[cfg_attr(feature = "serde", serde(borrow))]
712 MdxEsm(CowStr<'a>),
713}
714
715impl<'a> Event<'a> {
716 pub fn into_static(self) -> Event<'static> {
717 match self {
718 Event::Start(t) => Event::Start(t.into_static()),
719 Event::End(e) => Event::End(e),
720 Event::Text(s) => Event::Text(s.into_static()),
721 Event::Code(s) => Event::Code(s.into_static()),
722 Event::InlineMath(s) => Event::InlineMath(s.into_static()),
723 Event::DisplayMath(s) => Event::DisplayMath(s.into_static()),
724 Event::Html(s) => Event::Html(s.into_static()),
725 Event::InlineHtml(s) => Event::InlineHtml(s.into_static()),
726 Event::FootnoteReference(s) => Event::FootnoteReference(s.into_static()),
727 Event::SoftBreak => Event::SoftBreak,
728 Event::HardBreak => Event::HardBreak,
729 Event::Rule => Event::Rule,
730 Event::TaskListMarker(b) => Event::TaskListMarker(b),
731 #[cfg(feature = "mdx")]
732 Event::MdxFlowExpression(s) => Event::MdxFlowExpression(s.into_static()),
733 #[cfg(feature = "mdx")]
734 Event::MdxTextExpression(s) => Event::MdxTextExpression(s.into_static()),
735 #[cfg(feature = "mdx")]
736 Event::MdxEsm(s) => Event::MdxEsm(s.into_static()),
737 }
738 }
739}
740
741/// Table column text alignment.
742#[derive(Copy, Clone, Debug, PartialEq)]
743#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
744pub enum Alignment {
745 /// Default text alignment.
746 None,
747 Left,
748 Center,
749 Right,
750}
751
752bitflags::bitflags! {
753 /// Option struct containing flags for enabling extra features
754 /// that are not part of the CommonMark spec.
755 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
756 pub struct Options: u32 {
757 const ENABLE_TABLES = 1 << 1;
758 /// GitHub-compatible footnote syntax.
759 ///
760 /// Footnotes are referenced with the syntax `[^IDENT]`,
761 /// and defined with an identifier followed by a colon at top level.
762 ///
763 /// ---
764 ///
765 /// ```markdown
766 /// Footnote referenced [^1].
767 ///
768 /// [^1]: footnote defined
769 /// ```
770 ///
771 /// Footnote referenced [^1].
772 ///
773 /// [^1]: footnote defined
774 const ENABLE_FOOTNOTES = 1 << 2;
775 const ENABLE_STRIKETHROUGH = 1 << 3;
776 const ENABLE_TASKLISTS = 1 << 4;
777 /// Enables replacement of ASCII punctuation characters with
778 /// Unicode ligatures and smart quotes.
779 ///
780 /// This includes replacing `--` with `–`, `---` with `—`, `...` with `…`,
781 /// `”quote”` with `\u{201c}quote\u{201d}`, and `’quote’` with `\u{2018}quote\u{2019}`.
782 ///
783 /// Equivalent to enabling all of `ENABLE_SMART_QUOTES`,
784 /// `ENABLE_SMART_DASHES`, and `ENABLE_SMART_ELLIPSES`.
785 const ENABLE_SMART_PUNCTUATION = 1 << 5;
786 /// Replace straight quotes (`”`, `’`) with curly/smart quotes.
787 const ENABLE_SMART_QUOTES = 1 << 18;
788 /// Replace `--` with en-dash and `---` with em-dash.
789 const ENABLE_SMART_DASHES = 1 << 19;
790 /// Replace `...` with ellipsis (`…`).
791 const ENABLE_SMART_ELLIPSES = 1 << 20;
792 /// Extension to allow headings to have ID and classes.
793 ///
794 /// `# text { #id .class1 .class2 myattr other_attr=myvalue }`
795 /// is interpreted as a level 1 heading
796 /// with the content `text`, ID `id`, classes `class1` and `class2` and
797 /// custom attributes `myattr` (without value) and
798 /// `other_attr` with value `myvalue`.
799 /// Note that ID, classes, and custom attributes should be space-separated.
800 const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
801 /// Metadata blocks in YAML style, i.e.:
802 /// - starting with a `---` line
803 /// - ending with a `---` or `...` line
804 const ENABLE_YAML_STYLE_METADATA_BLOCKS = 1 << 7;
805 /// Metadata blocks delimited by:
806 /// - `+++` line at start
807 /// - `+++` line at end
808 const ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS = 1 << 8;
809 /// Emits `Event::InlineMath` and `Event::DisplayMath` for TeX formulas.
810 /// Umbrella over [`ENABLE_MATH_SINGLE_DOLLAR`](Self::ENABLE_MATH_SINGLE_DOLLAR)
811 /// and [`ENABLE_MATH_MULTI_DOLLAR`](Self::ENABLE_MATH_MULTI_DOLLAR).
812 const ENABLE_MATH = 1 << 10;
813 /// Single-dollar inline math (`$x$`).
814 const ENABLE_MATH_SINGLE_DOLLAR = 1 << 22;
815 /// Multi-dollar math: inline `$$x$$` and `$$` block fences.
816 const ENABLE_MATH_MULTI_DOLLAR = 1 << 23;
817 /// Misc GitHub Flavored Markdown features not supported in CommonMark.
818 const ENABLE_GFM = 1 << 11;
819 /// GitHub-style blockquote alerts ([!NOTE], [!TIP], [!IMPORTANT], [!WARNING], [!CAUTION]).
820 /// Not part of the GFM spec — this is a GitHub-specific feature.
821 const ENABLE_GITHUB_ALERTS = 1 << 21;
822 /// Commonmark-HS-Extensions compatible definition lists.
823 ///
824 /// ```markdown
825 /// title 1
826 /// : definition 1
827 ///
828 /// title 2
829 /// : definition 2a
830 /// : definition 2b
831 /// ```
832 const ENABLE_DEFINITION_LIST = 1 << 12;
833 const ENABLE_SUPERSCRIPT = 1 << 13;
834 const ENABLE_SUBSCRIPT = 1 << 14;
835 /// Obsidian-style Wikilinks.
836 const ENABLE_WIKILINKS = 1 << 15;
837 /// Directives: container (:::), leaf (::), and text (:) directives.
838 const ENABLE_DIRECTIVE = 1 << 16;
839 /// MDX: enables JSX elements, expressions, and ESM import/export.
840 const ENABLE_MDX = 1 << 17;
841 }
842}
843
844impl Options {
845 pub(crate) fn has_smart_quotes(&self) -> bool {
846 self.contains(Options::ENABLE_SMART_PUNCTUATION)
847 || self.contains(Options::ENABLE_SMART_QUOTES)
848 }
849
850 pub(crate) fn has_smart_dashes(&self) -> bool {
851 self.contains(Options::ENABLE_SMART_PUNCTUATION)
852 || self.contains(Options::ENABLE_SMART_DASHES)
853 }
854
855 pub(crate) fn has_smart_ellipses(&self) -> bool {
856 self.contains(Options::ENABLE_SMART_PUNCTUATION)
857 || self.contains(Options::ENABLE_SMART_ELLIPSES)
858 }
859
860 pub(crate) fn has_math(&self) -> bool {
861 self.intersects(
862 Options::ENABLE_MATH
863 | Options::ENABLE_MATH_SINGLE_DOLLAR
864 | Options::ENABLE_MATH_MULTI_DOLLAR,
865 )
866 }
867}