satteri_pulldown_cmark/lib.rs
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to build an arena representation via [`parse()`].
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use satteri_pulldown_cmark::{parse, Options};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! let mut options = Options::empty();
36//! options.insert(Options::ENABLE_STRIKETHROUGH);
37//! let (arena, _) = parse(markdown_input, options);
38//! let html = satteri_ast::mdast_to_html(&arena);
39//!
40//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
41//! assert_eq!(expected_html, &html);
42//! ```
43//!
44//! Note that consecutive text events can happen due to the manner in which the
45//! parser evaluates the source. A utility `TextMergeStream` exists to improve
46//! the comfort of iterating the events:
47//!
48//! ```rust
49//! use satteri_pulldown_cmark::{Event, Parser, TextMergeStream};
50//!
51//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
52//!
53//! let iterator = TextMergeStream::new(Parser::new(markdown_input));
54//!
55//! for event in iterator {
56//! match event {
57//! Event::Text(text) => println!("{}", text),
58//! _ => {}
59//! }
60//! }
61//! ```
62//!
63#![warn(
64 clippy::alloc_instead_of_core,
65 clippy::std_instead_of_alloc,
66 clippy::std_instead_of_core
67)]
68// Forbid unsafe code unless the SIMD feature is enabled.
69#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
70#![warn(missing_debug_implementations)]
71#![cfg_attr(not(feature = "std"), no_std)]
72
73#[macro_use]
74extern crate alloc;
75
76#[cfg(feature = "std")]
77extern crate std;
78
79#[cfg(not(feature = "std"))]
80compile_error!("This crate requires the \"std\" feature.");
81
82use alloc::vec::Vec;
83
84#[cfg(feature = "serde")]
85use serde::{Deserialize, Serialize};
86
87pub mod utils;
88
89pub mod arena_build;
90mod entities;
91mod firstpass;
92mod linklabel;
93mod mdx;
94mod parse;
95pub(crate) mod post_passes;
96mod puncttable;
97mod scanners;
98mod strings;
99mod tree;
100
101use core::fmt::Display;
102
103pub use crate::{
104 arena_build::{parse, DEFAULT_OPTIONS, MDX_OPTIONS},
105 parse::{
106 BrokenLink, BrokenLinkCallback, DefaultParserCallbacks, OffsetIter, Parser,
107 ParserCallbacks, RefDefs,
108 },
109 strings::{CowStr, InlineStr},
110 utils::*,
111};
112
113/// Codeblock kind.
114#[derive(Clone, Debug, PartialEq)]
115#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
116pub enum CodeBlockKind<'a> {
117 Indented,
118 /// The value contained in the tag describes the language of the code, which may be empty.
119 #[cfg_attr(feature = "serde", serde(borrow))]
120 Fenced(CowStr<'a>),
121}
122
123impl<'a> CodeBlockKind<'a> {
124 pub fn is_indented(&self) -> bool {
125 matches!(*self, CodeBlockKind::Indented)
126 }
127
128 pub fn is_fenced(&self) -> bool {
129 matches!(*self, CodeBlockKind::Fenced(_))
130 }
131
132 pub fn into_static(self) -> CodeBlockKind<'static> {
133 match self {
134 CodeBlockKind::Indented => CodeBlockKind::Indented,
135 CodeBlockKind::Fenced(s) => CodeBlockKind::Fenced(s.into_static()),
136 }
137 }
138}
139
140/// BlockQuote kind (Note, Tip, Important, Warning, Caution).
141#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
142#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
143pub enum BlockQuoteKind {
144 Note,
145 Tip,
146 Important,
147 Warning,
148 Caution,
149}
150
151/// Directive kind.
152#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
153#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
154pub enum DirectiveKind {
155 Container,
156 Leaf,
157 Text,
158}
159
160/// Metadata block kind.
161#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
162#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
163pub enum MetadataBlockKind {
164 YamlStyle,
165 PlusesStyle,
166}
167
168/// Tags for elements that can contain other elements.
169#[derive(Clone, Debug, PartialEq)]
170#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
171pub enum Tag<'a> {
172 /// A paragraph of text and other inline elements.
173 Paragraph,
174
175 /// A heading, with optional identifier, classes and custom attributes.
176 /// The identifier is prefixed with `#` and the last one in the attributes
177 /// list is chosen, classes are prefixed with `.` and custom attributes
178 /// have no prefix and can optionally have a value (`myattr` or `myattr=myvalue`).
179 ///
180 /// `id`, `classes` and `attrs` are only parsed and populated with [`Options::ENABLE_HEADING_ATTRIBUTES`], `None` or empty otherwise.
181 Heading {
182 level: HeadingLevel,
183 id: Option<CowStr<'a>>,
184 classes: Vec<CowStr<'a>>,
185 /// The first item of the tuple is the attr and second one the value.
186 attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
187 },
188
189 /// A block quote.
190 ///
191 /// The `BlockQuoteKind` is only parsed & populated with [`Options::ENABLE_GFM`], `None` otherwise.
192 ///
193 /// ```markdown
194 /// > regular quote
195 ///
196 /// > [!NOTE]
197 /// > note quote
198 /// ```
199 BlockQuote(Option<BlockQuoteKind>),
200 /// A code block.
201 CodeBlock(CodeBlockKind<'a>),
202 /// A directive (container, leaf, or text).
203 /// Only parsed and emitted with [`Options::ENABLE_DIRECTIVE`].
204 Directive {
205 kind: DirectiveKind,
206 name: CowStr<'a>,
207 attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
208 },
209
210 /// An HTML block.
211 ///
212 /// A line that begins with some predefined tags (HTML block tags) (see [CommonMark Spec](https://spec.commonmark.org/0.31.2/#html-blocks) for more details) or any tag that is followed only by whitespace.
213 ///
214 /// Most HTML blocks end on an empty line, though some e.g. `<pre>` like `<script>` or `<!-- Comments -->` don't.
215 /// ```markdown
216 /// <body> Is HTML block even though here is non-whitespace.
217 /// Block ends on an empty line.
218 ///
219 /// <some-random-tag>
220 /// This is HTML block.
221 ///
222 /// <pre> Doesn't end on empty lines.
223 ///
224 /// This is still the same block.</pre>
225 /// ```
226 HtmlBlock,
227
228 /// A list. If the list is ordered the first field indicates the number of the first item.
229 /// The second field is `true` when the list is tight (no blank lines between items).
230 /// Contains only list items.
231 List(Option<u64>, bool),
232 /// A list item.
233 Item,
234 /// A footnote definition. The value contained is the footnote's label by which it can
235 /// be referred to.
236 ///
237 /// Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
238 #[cfg_attr(feature = "serde", serde(borrow))]
239 FootnoteDefinition(CowStr<'a>),
240
241 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
242 DefinitionList,
243 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
244 DefinitionListTitle,
245 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
246 DefinitionListDefinition,
247
248 /// A table. Contains a vector describing the text-alignment for each of its columns.
249 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
250 Table(Vec<Alignment>),
251 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
252 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
253 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
254 TableHead,
255 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
256 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
257 TableRow,
258 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
259 TableCell,
260
261 // span-level tags
262 /// [Emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
263 /// ```markdown
264 /// half*emph* _strong_ _multi _level__
265 /// ```
266 Emphasis,
267 /// [Strong emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
268 /// ```markdown
269 /// half**strong** __strong__ __multi __level____
270 /// ```
271 Strong,
272 /// Only parsed and emitted with [`Options::ENABLE_STRIKETHROUGH`].
273 ///
274 /// ```markdown
275 /// ~strike through~
276 /// ```
277 Strikethrough,
278 /// Only parsed and emitted with [`Options::ENABLE_SUPERSCRIPT`].
279 ///
280 /// ```markdown
281 /// ^superscript^
282 /// ```
283 Superscript,
284 /// Only parsed and emitted with [`Options::ENABLE_SUBSCRIPT`], if disabled `~something~` is parsed as [`Strikethrough`](Self::Strikethrough).
285 /// ```markdown
286 /// ~subscript~ ~~if also enabled this is strikethrough~~
287 /// ```
288 Subscript,
289
290 /// A link.
291 Link {
292 link_type: LinkType,
293 dest_url: CowStr<'a>,
294 title: CowStr<'a>,
295 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
296 id: CowStr<'a>,
297 },
298
299 /// An image. The first field is the link type, the second the destination URL and the third is a title,
300 /// the fourth is the link identifier.
301 Image {
302 link_type: LinkType,
303 dest_url: CowStr<'a>,
304 title: CowStr<'a>,
305 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
306 id: CowStr<'a>,
307 },
308
309 /// A metadata block.
310 /// Only parsed and emitted with [`Options::ENABLE_YAML_STYLE_METADATA_BLOCKS`]
311 /// or [`Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS`].
312 MetadataBlock(MetadataBlockKind),
313
314 /// An MDX JSX element (flow-level, i.e. block).
315 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
316 /// The `CowStr` is the raw JSX tag content (e.g. `Component x={1}`).
317 /// ```mdx
318 /// <Component x={1}>
319 /// children
320 /// </Component>
321 /// ```
322 #[cfg_attr(feature = "serde", serde(borrow))]
323 MdxJsxFlowElement(CowStr<'a>),
324
325 /// An MDX JSX element (text-level, i.e. inline).
326 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
327 #[cfg_attr(feature = "serde", serde(borrow))]
328 MdxJsxTextElement(CowStr<'a>),
329}
330
331impl<'a> Tag<'a> {
332 pub fn to_end(&self) -> TagEnd {
333 match self {
334 Tag::Paragraph => TagEnd::Paragraph,
335 Tag::Heading { level, .. } => TagEnd::Heading(*level),
336 Tag::BlockQuote(kind) => TagEnd::BlockQuote(*kind),
337 Tag::CodeBlock(_) => TagEnd::CodeBlock,
338 Tag::Directive { kind, .. } => TagEnd::Directive(*kind),
339 Tag::HtmlBlock => TagEnd::HtmlBlock,
340 Tag::List(number, _) => TagEnd::List(number.is_some()),
341 Tag::Item => TagEnd::Item,
342 Tag::FootnoteDefinition(_) => TagEnd::FootnoteDefinition,
343 Tag::Table(_) => TagEnd::Table,
344 Tag::TableHead => TagEnd::TableHead,
345 Tag::TableRow => TagEnd::TableRow,
346 Tag::TableCell => TagEnd::TableCell,
347 Tag::Subscript => TagEnd::Subscript,
348 Tag::Superscript => TagEnd::Superscript,
349 Tag::Emphasis => TagEnd::Emphasis,
350 Tag::Strong => TagEnd::Strong,
351 Tag::Strikethrough => TagEnd::Strikethrough,
352 Tag::Link { .. } => TagEnd::Link,
353 Tag::Image { .. } => TagEnd::Image,
354 Tag::MetadataBlock(kind) => TagEnd::MetadataBlock(*kind),
355 Tag::DefinitionList => TagEnd::DefinitionList,
356 Tag::DefinitionListTitle => TagEnd::DefinitionListTitle,
357 Tag::DefinitionListDefinition => TagEnd::DefinitionListDefinition,
358 Tag::MdxJsxFlowElement(_) => TagEnd::MdxJsxFlowElement,
359 Tag::MdxJsxTextElement(_) => TagEnd::MdxJsxTextElement,
360 }
361 }
362
363 pub fn into_static(self) -> Tag<'static> {
364 match self {
365 Tag::Paragraph => Tag::Paragraph,
366 Tag::Heading {
367 level,
368 id,
369 classes,
370 attrs,
371 } => Tag::Heading {
372 level,
373 id: id.map(|s| s.into_static()),
374 classes: classes.into_iter().map(|s| s.into_static()).collect(),
375 attrs: attrs
376 .into_iter()
377 .map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
378 .collect(),
379 },
380 Tag::BlockQuote(k) => Tag::BlockQuote(k),
381 Tag::CodeBlock(kb) => Tag::CodeBlock(kb.into_static()),
382 Tag::Directive {
383 kind,
384 name,
385 attributes,
386 } => Tag::Directive {
387 kind,
388 name: name.into_static(),
389 attributes: attributes
390 .into_iter()
391 .map(|(k, v)| (k.into_static(), v.into_static()))
392 .collect(),
393 },
394 Tag::HtmlBlock => Tag::HtmlBlock,
395 Tag::List(v, t) => Tag::List(v, t),
396 Tag::Item => Tag::Item,
397 Tag::FootnoteDefinition(a) => Tag::FootnoteDefinition(a.into_static()),
398 Tag::Table(v) => Tag::Table(v),
399 Tag::TableHead => Tag::TableHead,
400 Tag::TableRow => Tag::TableRow,
401 Tag::TableCell => Tag::TableCell,
402 Tag::Emphasis => Tag::Emphasis,
403 Tag::Strong => Tag::Strong,
404 Tag::Strikethrough => Tag::Strikethrough,
405 Tag::Superscript => Tag::Superscript,
406 Tag::Subscript => Tag::Subscript,
407 Tag::Link {
408 link_type,
409 dest_url,
410 title,
411 id,
412 } => Tag::Link {
413 link_type,
414 dest_url: dest_url.into_static(),
415 title: title.into_static(),
416 id: id.into_static(),
417 },
418 Tag::Image {
419 link_type,
420 dest_url,
421 title,
422 id,
423 } => Tag::Image {
424 link_type,
425 dest_url: dest_url.into_static(),
426 title: title.into_static(),
427 id: id.into_static(),
428 },
429 Tag::MetadataBlock(v) => Tag::MetadataBlock(v),
430 Tag::DefinitionList => Tag::DefinitionList,
431 Tag::DefinitionListTitle => Tag::DefinitionListTitle,
432 Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
433 Tag::MdxJsxFlowElement(s) => Tag::MdxJsxFlowElement(s.into_static()),
434 Tag::MdxJsxTextElement(s) => Tag::MdxJsxTextElement(s.into_static()),
435 }
436 }
437}
438
439/// The end of a `Tag`.
440#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
441#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
442pub enum TagEnd {
443 Paragraph,
444 Heading(HeadingLevel),
445
446 BlockQuote(Option<BlockQuoteKind>),
447 CodeBlock,
448 Directive(DirectiveKind),
449
450 HtmlBlock,
451
452 /// A list, `true` for ordered lists.
453 List(bool),
454 Item,
455 FootnoteDefinition,
456
457 DefinitionList,
458 DefinitionListTitle,
459 DefinitionListDefinition,
460
461 Table,
462 TableHead,
463 TableRow,
464 TableCell,
465
466 Emphasis,
467 Strong,
468 Strikethrough,
469 Superscript,
470 Subscript,
471
472 Link,
473 Image,
474
475 MetadataBlock(MetadataBlockKind),
476
477 MdxJsxFlowElement,
478 MdxJsxTextElement,
479}
480
481/// Make sure `TagEnd` is no more than two bytes in size.
482/// This is why it's used instead of just using `Tag`.
483#[cfg(target_pointer_width = "64")]
484const _STATIC_ASSERT_TAG_END_SIZE: [(); 2] = [(); core::mem::size_of::<TagEnd>()];
485
486impl<'a> From<Tag<'a>> for TagEnd {
487 fn from(value: Tag) -> Self {
488 value.to_end()
489 }
490}
491
492#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
493#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
494pub enum HeadingLevel {
495 H1 = 1,
496 H2,
497 H3,
498 H4,
499 H5,
500 H6,
501}
502
503impl Display for HeadingLevel {
504 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
505 match self {
506 Self::H1 => write!(f, "h1"),
507 Self::H2 => write!(f, "h2"),
508 Self::H3 => write!(f, "h3"),
509 Self::H4 => write!(f, "h4"),
510 Self::H5 => write!(f, "h5"),
511 Self::H6 => write!(f, "h6"),
512 }
513 }
514}
515
516/// Returned when trying to convert a `usize` into a `Heading` but it fails
517/// because the usize isn't a valid heading level
518#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
519pub struct InvalidHeadingLevel(usize);
520
521impl TryFrom<usize> for HeadingLevel {
522 type Error = InvalidHeadingLevel;
523
524 fn try_from(value: usize) -> Result<Self, Self::Error> {
525 match value {
526 1 => Ok(Self::H1),
527 2 => Ok(Self::H2),
528 3 => Ok(Self::H3),
529 4 => Ok(Self::H4),
530 5 => Ok(Self::H5),
531 6 => Ok(Self::H6),
532 _ => Err(InvalidHeadingLevel(value)),
533 }
534 }
535}
536
537/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
538#[derive(Clone, Debug, PartialEq, Copy)]
539#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
540pub enum LinkType {
541 /// Inline link like `[foo](bar)`
542 Inline,
543 /// Reference link like `[foo][bar]`
544 Reference,
545 /// Reference without destination in the document, but resolved by the broken_link_callback
546 ReferenceUnknown,
547 /// Collapsed link like `[foo][]`
548 Collapsed,
549 /// Collapsed link without destination in the document, but resolved by the broken_link_callback
550 CollapsedUnknown,
551 /// Shortcut link like `[foo]`
552 Shortcut,
553 /// Shortcut without destination in the document, but resolved by the broken_link_callback
554 ShortcutUnknown,
555 /// Autolink like `<http://foo.bar/baz>`
556 Autolink,
557 /// Email address in autolink like `<john@example.org>`
558 Email,
559 /// Wikilink link like `[[foo]]` or `[[foo|bar]]`
560 WikiLink {
561 /// `true` if the wikilink was piped.
562 ///
563 /// * `true` - `[[foo|bar]]`
564 /// * `false` - `[[foo]]`
565 has_pothole: bool,
566 },
567}
568
569impl LinkType {
570 /// Map the link type to an equivalent _Unknown link type.
571 fn to_unknown(self) -> Self {
572 match self {
573 LinkType::Reference => LinkType::ReferenceUnknown,
574 LinkType::Collapsed => LinkType::CollapsedUnknown,
575 LinkType::Shortcut => LinkType::ShortcutUnknown,
576 _ => unreachable!(),
577 }
578 }
579}
580
581/// Markdown events that are generated in a preorder traversal of the document
582/// tree, with additional `End` events whenever all of an inner node's children
583/// have been visited.
584#[derive(Clone, Debug, PartialEq)]
585#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
586pub enum Event<'a> {
587 /// Start of a tagged element. Events that are yielded after this event
588 /// and before its corresponding `End` event are inside this element.
589 /// Start and end events are guaranteed to be balanced.
590 #[cfg_attr(feature = "serde", serde(borrow))]
591 Start(Tag<'a>),
592 /// End of a tagged element.
593 End(TagEnd),
594 /// A text node.
595 ///
596 /// All text, outside and inside [`Tag`]s.
597 #[cfg_attr(feature = "serde", serde(borrow))]
598 Text(CowStr<'a>),
599 /// An [inline code node](https://spec.commonmark.org/0.31.2/#code-spans).
600 ///
601 /// ```markdown
602 /// `code`
603 /// ```
604 #[cfg_attr(feature = "serde", serde(borrow))]
605 Code(CowStr<'a>),
606 /// An inline math environment node.
607 /// Requires [`Options::ENABLE_MATH`].
608 ///
609 /// ```markdown
610 /// $math$
611 /// ```
612 #[cfg_attr(feature = "serde", serde(borrow))]
613 InlineMath(CowStr<'a>),
614 /// A display math environment node.
615 /// Requires [`Options::ENABLE_MATH`].
616 ///
617 /// ```markdown
618 /// $$math$$
619 /// ```
620 #[cfg_attr(feature = "serde", serde(borrow))]
621 DisplayMath(CowStr<'a>),
622 /// An HTML node.
623 ///
624 /// A line of HTML inside [`Tag::HtmlBlock`] includes the line break.
625 #[cfg_attr(feature = "serde", serde(borrow))]
626 Html(CowStr<'a>),
627 /// An [inline HTML node](https://spec.commonmark.org/0.31.2/#raw-html).
628 ///
629 /// Contains only the tag itself, e.g. `<open-tag>`, `</close-tag>` or `<!-- comment -->`.
630 ///
631 /// **Note**: Under some conditions HTML can also be parsed as an HTML Block, see [`Tag::HtmlBlock`] for details.
632 #[cfg_attr(feature = "serde", serde(borrow))]
633 InlineHtml(CowStr<'a>),
634 /// A reference to a footnote with given label, defined
635 /// by an event with a [`Tag::FootnoteDefinition`] tag. Definitions and references to them may
636 /// occur in any order. Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
637 ///
638 /// ```markdown
639 /// [^1]
640 /// ```
641 #[cfg_attr(feature = "serde", serde(borrow))]
642 FootnoteReference(CowStr<'a>),
643 /// A [soft line break](https://spec.commonmark.org/0.31.2/#soft-line-breaks).
644 ///
645 /// Any line break that isn't a [`HardBreak`](Self::HardBreak), or the end of e.g. a paragraph.
646 SoftBreak,
647 /// A [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks).
648 ///
649 /// A line ending that is either preceded by at least two spaces or `\`.
650 ///
651 /// ```markdown
652 /// hard··
653 /// line\
654 /// breaks
655 /// ```
656 /// *`·` is a space*
657 HardBreak,
658 /// A horizontal ruler.
659 ///
660 /// ```markdown
661 /// ***
662 /// ···---
663 /// _·_··_····_··
664 /// ```
665 /// *`·` is any whitespace*
666 Rule,
667 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
668 /// Only parsed and emitted with [`Options::ENABLE_TASKLISTS`].
669 /// ```markdown
670 /// - [ ] unchecked
671 /// - [x] checked
672 /// ```
673 TaskListMarker(bool),
674
675 /// An MDX flow expression (block-level).
676 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
677 /// ```mdx
678 /// {1 + 1}
679 /// ```
680 #[cfg_attr(feature = "serde", serde(borrow))]
681 MdxFlowExpression(CowStr<'a>),
682
683 /// An MDX text expression (inline).
684 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
685 /// ```mdx
686 /// a]n {expression} here
687 /// ```
688 #[cfg_attr(feature = "serde", serde(borrow))]
689 MdxTextExpression(CowStr<'a>),
690
691 /// An MDX ESM block (import/export at document level).
692 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
693 /// ```mdx
694 /// import {Chart} from './chart.js'
695 /// export const meta = {}
696 /// ```
697 #[cfg_attr(feature = "serde", serde(borrow))]
698 MdxEsm(CowStr<'a>),
699}
700
701impl<'a> Event<'a> {
702 pub fn into_static(self) -> Event<'static> {
703 match self {
704 Event::Start(t) => Event::Start(t.into_static()),
705 Event::End(e) => Event::End(e),
706 Event::Text(s) => Event::Text(s.into_static()),
707 Event::Code(s) => Event::Code(s.into_static()),
708 Event::InlineMath(s) => Event::InlineMath(s.into_static()),
709 Event::DisplayMath(s) => Event::DisplayMath(s.into_static()),
710 Event::Html(s) => Event::Html(s.into_static()),
711 Event::InlineHtml(s) => Event::InlineHtml(s.into_static()),
712 Event::FootnoteReference(s) => Event::FootnoteReference(s.into_static()),
713 Event::SoftBreak => Event::SoftBreak,
714 Event::HardBreak => Event::HardBreak,
715 Event::Rule => Event::Rule,
716 Event::TaskListMarker(b) => Event::TaskListMarker(b),
717 Event::MdxFlowExpression(s) => Event::MdxFlowExpression(s.into_static()),
718 Event::MdxTextExpression(s) => Event::MdxTextExpression(s.into_static()),
719 Event::MdxEsm(s) => Event::MdxEsm(s.into_static()),
720 }
721 }
722}
723
724/// Table column text alignment.
725#[derive(Copy, Clone, Debug, PartialEq)]
726#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
727pub enum Alignment {
728 /// Default text alignment.
729 None,
730 Left,
731 Center,
732 Right,
733}
734
735bitflags::bitflags! {
736 /// Option struct containing flags for enabling extra features
737 /// that are not part of the CommonMark spec.
738 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
739 pub struct Options: u32 {
740 const ENABLE_TABLES = 1 << 1;
741 /// GitHub-compatible footnote syntax.
742 ///
743 /// Footnotes are referenced with the syntax `[^IDENT]`,
744 /// and defined with an identifier followed by a colon at top level.
745 ///
746 /// ---
747 ///
748 /// ```markdown
749 /// Footnote referenced [^1].
750 ///
751 /// [^1]: footnote defined
752 /// ```
753 ///
754 /// Footnote referenced [^1].
755 ///
756 /// [^1]: footnote defined
757 const ENABLE_FOOTNOTES = 1 << 2;
758 const ENABLE_STRIKETHROUGH = 1 << 3;
759 const ENABLE_TASKLISTS = 1 << 4;
760 /// Enables replacement of ASCII punctuation characters with
761 /// Unicode ligatures and smart quotes.
762 ///
763 /// This includes replacing `--` with `–`, `---` with `—`, `...` with `…`,
764 /// `”quote”` with `\u{201c}quote\u{201d}`, and `’quote’` with `\u{2018}quote\u{2019}`.
765 ///
766 /// Equivalent to enabling all of `ENABLE_SMART_QUOTES`,
767 /// `ENABLE_SMART_DASHES`, and `ENABLE_SMART_ELLIPSES`.
768 const ENABLE_SMART_PUNCTUATION = 1 << 5;
769 /// Replace straight quotes (`”`, `’`) with curly/smart quotes.
770 const ENABLE_SMART_QUOTES = 1 << 18;
771 /// Replace `--` with en-dash and `---` with em-dash.
772 const ENABLE_SMART_DASHES = 1 << 19;
773 /// Replace `...` with ellipsis (`…`).
774 const ENABLE_SMART_ELLIPSES = 1 << 20;
775 /// Extension to allow headings to have ID and classes.
776 ///
777 /// `# text { #id .class1 .class2 myattr other_attr=myvalue }`
778 /// is interpreted as a level 1 heading
779 /// with the content `text`, ID `id`, classes `class1` and `class2` and
780 /// custom attributes `myattr` (without value) and
781 /// `other_attr` with value `myvalue`.
782 /// Note that ID, classes, and custom attributes should be space-separated.
783 const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
784 /// Metadata blocks in YAML style, i.e.:
785 /// - starting with a `---` line
786 /// - ending with a `---` or `...` line
787 const ENABLE_YAML_STYLE_METADATA_BLOCKS = 1 << 7;
788 /// Metadata blocks delimited by:
789 /// - `+++` line at start
790 /// - `+++` line at end
791 const ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS = 1 << 8;
792 /// With this feature enabled, two events `Event::InlineMath` and `Event::DisplayMath`
793 /// are emitted that conventionally contain TeX formulas.
794 const ENABLE_MATH = 1 << 10;
795 /// Misc GitHub Flavored Markdown features not supported in CommonMark.
796 const ENABLE_GFM = 1 << 11;
797 /// GitHub-style blockquote alerts ([!NOTE], [!TIP], [!IMPORTANT], [!WARNING], [!CAUTION]).
798 /// Not part of the GFM spec — this is a GitHub-specific feature.
799 const ENABLE_GITHUB_ALERTS = 1 << 21;
800 /// Commonmark-HS-Extensions compatible definition lists.
801 ///
802 /// ```markdown
803 /// title 1
804 /// : definition 1
805 ///
806 /// title 2
807 /// : definition 2a
808 /// : definition 2b
809 /// ```
810 const ENABLE_DEFINITION_LIST = 1 << 12;
811 const ENABLE_SUPERSCRIPT = 1 << 13;
812 const ENABLE_SUBSCRIPT = 1 << 14;
813 /// Obsidian-style Wikilinks.
814 const ENABLE_WIKILINKS = 1 << 15;
815 /// Directives: container (:::), leaf (::), and text (:) directives.
816 const ENABLE_DIRECTIVE = 1 << 16;
817 /// MDX: enables JSX elements, expressions, and ESM import/export.
818 const ENABLE_MDX = 1 << 17;
819 }
820}
821
822impl Options {
823 pub(crate) fn has_smart_quotes(&self) -> bool {
824 self.contains(Options::ENABLE_SMART_PUNCTUATION)
825 || self.contains(Options::ENABLE_SMART_QUOTES)
826 }
827
828 pub(crate) fn has_smart_dashes(&self) -> bool {
829 self.contains(Options::ENABLE_SMART_PUNCTUATION)
830 || self.contains(Options::ENABLE_SMART_DASHES)
831 }
832
833 pub(crate) fn has_smart_ellipses(&self) -> bool {
834 self.contains(Options::ENABLE_SMART_PUNCTUATION)
835 || self.contains(Options::ENABLE_SMART_ELLIPSES)
836 }
837}