satteri_pulldown_cmark/lib.rs
1// Copyright 2015 Google Inc. All rights reserved.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a copy
4// of this software and associated documentation files (the "Software"), to deal
5// in the Software without restriction, including without limitation the rights
6// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7// copies of the Software, and to permit persons to whom the Software is
8// furnished to do so, subject to the following conditions:
9//
10// The above copyright notice and this permission notice shall be included in
11// all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19// THE SOFTWARE.
20
21//! Pull parser for [CommonMark](https://commonmark.org). This crate provides a [Parser](struct.Parser.html) struct
22//! which is an iterator over [Event](enum.Event.html)s. This iterator can be used
23//! directly, or to build an arena representation via [`parse()`].
24//!
25//! By default, only CommonMark features are enabled. To use extensions like tables,
26//! footnotes or task lists, enable them by setting the corresponding flags in the
27//! [Options](struct.Options.html) struct.
28//!
29//! # Example
30//! ```rust
31//! use satteri_pulldown_cmark::{parse, Options};
32//!
33//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
34//!
35//! let mut options = Options::empty();
36//! options.insert(Options::ENABLE_STRIKETHROUGH);
37//! let (arena, _) = parse(markdown_input, options);
38//! let html = satteri_ast::mdast_to_html(&arena);
39//!
40//! let expected_html = "<p>Hello world, this is a <del>complicated</del> <em>very simple</em> example.</p>\n";
41//! assert_eq!(expected_html, &html);
42//! ```
43//!
44//! Note that consecutive text events can happen due to the manner in which the
45//! parser evaluates the source. A utility `TextMergeStream` exists to improve
46//! the comfort of iterating the events:
47//!
48//! ```rust
49//! use satteri_pulldown_cmark::{Event, Parser, TextMergeStream};
50//!
51//! let markdown_input = "Hello world, this is a ~~complicated~~ *very simple* example.";
52//!
53//! let iterator = TextMergeStream::new(Parser::new(markdown_input));
54//!
55//! for event in iterator {
56//! match event {
57//! Event::Text(text) => println!("{}", text),
58//! _ => {}
59//! }
60//! }
61//! ```
62//!
63#![warn(
64 clippy::alloc_instead_of_core,
65 clippy::std_instead_of_alloc,
66 clippy::std_instead_of_core
67)]
68// Forbid unsafe code unless the SIMD feature is enabled.
69#![cfg_attr(not(feature = "simd"), forbid(unsafe_code))]
70#![warn(missing_debug_implementations)]
71#![cfg_attr(not(feature = "std"), no_std)]
72
73#[macro_use]
74extern crate alloc;
75
76#[cfg(feature = "std")]
77extern crate std;
78
79#[cfg(not(feature = "std"))]
80compile_error!("This crate requires the \"std\" feature.");
81
82use alloc::vec::Vec;
83
84#[cfg(feature = "serde")]
85use serde::{Deserialize, Serialize};
86
87pub mod utils;
88
89pub mod arena_build;
90mod entities;
91mod firstpass;
92mod linklabel;
93mod mdx;
94mod parse;
95mod puncttable;
96mod scanners;
97mod strings;
98mod tree;
99
100use core::fmt::Display;
101
102pub use crate::{
103 arena_build::{parse, DEFAULT_OPTIONS, MDX_OPTIONS},
104 parse::{
105 BrokenLink, BrokenLinkCallback, DefaultParserCallbacks, OffsetIter, Parser,
106 ParserCallbacks, RefDefs,
107 },
108 strings::{CowStr, InlineStr},
109 utils::*,
110};
111
112/// Codeblock kind.
113#[derive(Clone, Debug, PartialEq)]
114#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
115pub enum CodeBlockKind<'a> {
116 Indented,
117 /// The value contained in the tag describes the language of the code, which may be empty.
118 #[cfg_attr(feature = "serde", serde(borrow))]
119 Fenced(CowStr<'a>),
120}
121
122impl<'a> CodeBlockKind<'a> {
123 pub fn is_indented(&self) -> bool {
124 matches!(*self, CodeBlockKind::Indented)
125 }
126
127 pub fn is_fenced(&self) -> bool {
128 matches!(*self, CodeBlockKind::Fenced(_))
129 }
130
131 pub fn into_static(self) -> CodeBlockKind<'static> {
132 match self {
133 CodeBlockKind::Indented => CodeBlockKind::Indented,
134 CodeBlockKind::Fenced(s) => CodeBlockKind::Fenced(s.into_static()),
135 }
136 }
137}
138
139/// BlockQuote kind (Note, Tip, Important, Warning, Caution).
140#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
141#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
142pub enum BlockQuoteKind {
143 Note,
144 Tip,
145 Important,
146 Warning,
147 Caution,
148}
149
150/// Directive kind.
151#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
152#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
153pub enum DirectiveKind {
154 Container,
155 Leaf,
156 Text,
157}
158
159/// Metadata block kind.
160#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
161#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
162pub enum MetadataBlockKind {
163 YamlStyle,
164 PlusesStyle,
165}
166
167/// Tags for elements that can contain other elements.
168#[derive(Clone, Debug, PartialEq)]
169#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
170pub enum Tag<'a> {
171 /// A paragraph of text and other inline elements.
172 Paragraph,
173
174 /// A heading, with optional identifier, classes and custom attributes.
175 /// The identifier is prefixed with `#` and the last one in the attributes
176 /// list is chosen, classes are prefixed with `.` and custom attributes
177 /// have no prefix and can optionally have a value (`myattr` or `myattr=myvalue`).
178 ///
179 /// `id`, `classes` and `attrs` are only parsed and populated with [`Options::ENABLE_HEADING_ATTRIBUTES`], `None` or empty otherwise.
180 Heading {
181 level: HeadingLevel,
182 id: Option<CowStr<'a>>,
183 classes: Vec<CowStr<'a>>,
184 /// The first item of the tuple is the attr and second one the value.
185 attrs: Vec<(CowStr<'a>, Option<CowStr<'a>>)>,
186 },
187
188 /// A block quote.
189 ///
190 /// The `BlockQuoteKind` is only parsed & populated with [`Options::ENABLE_GFM`], `None` otherwise.
191 ///
192 /// ```markdown
193 /// > regular quote
194 ///
195 /// > [!NOTE]
196 /// > note quote
197 /// ```
198 BlockQuote(Option<BlockQuoteKind>),
199 /// A code block.
200 CodeBlock(CodeBlockKind<'a>),
201 /// A directive (container, leaf, or text).
202 /// Only parsed and emitted with [`Options::ENABLE_DIRECTIVE`].
203 Directive {
204 kind: DirectiveKind,
205 name: CowStr<'a>,
206 attributes: Vec<(CowStr<'a>, CowStr<'a>)>,
207 },
208
209 /// An HTML block.
210 ///
211 /// A line that begins with some predefined tags (HTML block tags) (see [CommonMark Spec](https://spec.commonmark.org/0.31.2/#html-blocks) for more details) or any tag that is followed only by whitespace.
212 ///
213 /// Most HTML blocks end on an empty line, though some e.g. `<pre>` like `<script>` or `<!-- Comments -->` don't.
214 /// ```markdown
215 /// <body> Is HTML block even though here is non-whitespace.
216 /// Block ends on an empty line.
217 ///
218 /// <some-random-tag>
219 /// This is HTML block.
220 ///
221 /// <pre> Doesn't end on empty lines.
222 ///
223 /// This is still the same block.</pre>
224 /// ```
225 HtmlBlock,
226
227 /// A list. If the list is ordered the first field indicates the number of the first item.
228 /// The second field is `true` when the list is tight (no blank lines between items).
229 /// Contains only list items.
230 List(Option<u64>, bool),
231 /// A list item.
232 Item,
233 /// A footnote definition. The value contained is the footnote's label by which it can
234 /// be referred to.
235 ///
236 /// Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
237 #[cfg_attr(feature = "serde", serde(borrow))]
238 FootnoteDefinition(CowStr<'a>),
239
240 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
241 DefinitionList,
242 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
243 DefinitionListTitle,
244 /// Only parsed and emitted with [`Options::ENABLE_DEFINITION_LIST`].
245 DefinitionListDefinition,
246
247 /// A table. Contains a vector describing the text-alignment for each of its columns.
248 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
249 Table(Vec<Alignment>),
250 /// A table header. Contains only `TableCell`s. Note that the table body starts immediately
251 /// after the closure of the `TableHead` tag. There is no `TableBody` tag.
252 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
253 TableHead,
254 /// A table row. Is used both for header rows as body rows. Contains only `TableCell`s.
255 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
256 TableRow,
257 /// Only parsed and emitted with [`Options::ENABLE_TABLES`].
258 TableCell,
259
260 // span-level tags
261 /// [Emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
262 /// ```markdown
263 /// half*emph* _strong_ _multi _level__
264 /// ```
265 Emphasis,
266 /// [Strong emphasis](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis).
267 /// ```markdown
268 /// half**strong** __strong__ __multi __level____
269 /// ```
270 Strong,
271 /// Only parsed and emitted with [`Options::ENABLE_STRIKETHROUGH`].
272 ///
273 /// ```markdown
274 /// ~strike through~
275 /// ```
276 Strikethrough,
277 /// Only parsed and emitted with [`Options::ENABLE_SUPERSCRIPT`].
278 ///
279 /// ```markdown
280 /// ^superscript^
281 /// ```
282 Superscript,
283 /// Only parsed and emitted with [`Options::ENABLE_SUBSCRIPT`], if disabled `~something~` is parsed as [`Strikethrough`](Self::Strikethrough).
284 /// ```markdown
285 /// ~subscript~ ~~if also enabled this is strikethrough~~
286 /// ```
287 Subscript,
288
289 /// A link.
290 Link {
291 link_type: LinkType,
292 dest_url: CowStr<'a>,
293 title: CowStr<'a>,
294 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
295 id: CowStr<'a>,
296 },
297
298 /// An image. The first field is the link type, the second the destination URL and the third is a title,
299 /// the fourth is the link identifier.
300 Image {
301 link_type: LinkType,
302 dest_url: CowStr<'a>,
303 title: CowStr<'a>,
304 /// Identifier of reference links, e.g. `world` in the link `[hello][world]`.
305 id: CowStr<'a>,
306 },
307
308 /// A metadata block.
309 /// Only parsed and emitted with [`Options::ENABLE_YAML_STYLE_METADATA_BLOCKS`]
310 /// or [`Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS`].
311 MetadataBlock(MetadataBlockKind),
312
313 /// An MDX JSX element (flow-level, i.e. block).
314 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
315 /// The `CowStr` is the raw JSX tag content (e.g. `Component x={1}`).
316 /// ```mdx
317 /// <Component x={1}>
318 /// children
319 /// </Component>
320 /// ```
321 #[cfg_attr(feature = "serde", serde(borrow))]
322 MdxJsxFlowElement(CowStr<'a>),
323
324 /// An MDX JSX element (text-level, i.e. inline).
325 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
326 #[cfg_attr(feature = "serde", serde(borrow))]
327 MdxJsxTextElement(CowStr<'a>),
328}
329
330impl<'a> Tag<'a> {
331 pub fn to_end(&self) -> TagEnd {
332 match self {
333 Tag::Paragraph => TagEnd::Paragraph,
334 Tag::Heading { level, .. } => TagEnd::Heading(*level),
335 Tag::BlockQuote(kind) => TagEnd::BlockQuote(*kind),
336 Tag::CodeBlock(_) => TagEnd::CodeBlock,
337 Tag::Directive { kind, .. } => TagEnd::Directive(*kind),
338 Tag::HtmlBlock => TagEnd::HtmlBlock,
339 Tag::List(number, _) => TagEnd::List(number.is_some()),
340 Tag::Item => TagEnd::Item,
341 Tag::FootnoteDefinition(_) => TagEnd::FootnoteDefinition,
342 Tag::Table(_) => TagEnd::Table,
343 Tag::TableHead => TagEnd::TableHead,
344 Tag::TableRow => TagEnd::TableRow,
345 Tag::TableCell => TagEnd::TableCell,
346 Tag::Subscript => TagEnd::Subscript,
347 Tag::Superscript => TagEnd::Superscript,
348 Tag::Emphasis => TagEnd::Emphasis,
349 Tag::Strong => TagEnd::Strong,
350 Tag::Strikethrough => TagEnd::Strikethrough,
351 Tag::Link { .. } => TagEnd::Link,
352 Tag::Image { .. } => TagEnd::Image,
353 Tag::MetadataBlock(kind) => TagEnd::MetadataBlock(*kind),
354 Tag::DefinitionList => TagEnd::DefinitionList,
355 Tag::DefinitionListTitle => TagEnd::DefinitionListTitle,
356 Tag::DefinitionListDefinition => TagEnd::DefinitionListDefinition,
357 Tag::MdxJsxFlowElement(_) => TagEnd::MdxJsxFlowElement,
358 Tag::MdxJsxTextElement(_) => TagEnd::MdxJsxTextElement,
359 }
360 }
361
362 pub fn into_static(self) -> Tag<'static> {
363 match self {
364 Tag::Paragraph => Tag::Paragraph,
365 Tag::Heading {
366 level,
367 id,
368 classes,
369 attrs,
370 } => Tag::Heading {
371 level,
372 id: id.map(|s| s.into_static()),
373 classes: classes.into_iter().map(|s| s.into_static()).collect(),
374 attrs: attrs
375 .into_iter()
376 .map(|(k, v)| (k.into_static(), v.map(|s| s.into_static())))
377 .collect(),
378 },
379 Tag::BlockQuote(k) => Tag::BlockQuote(k),
380 Tag::CodeBlock(kb) => Tag::CodeBlock(kb.into_static()),
381 Tag::Directive {
382 kind,
383 name,
384 attributes,
385 } => Tag::Directive {
386 kind,
387 name: name.into_static(),
388 attributes: attributes
389 .into_iter()
390 .map(|(k, v)| (k.into_static(), v.into_static()))
391 .collect(),
392 },
393 Tag::HtmlBlock => Tag::HtmlBlock,
394 Tag::List(v, t) => Tag::List(v, t),
395 Tag::Item => Tag::Item,
396 Tag::FootnoteDefinition(a) => Tag::FootnoteDefinition(a.into_static()),
397 Tag::Table(v) => Tag::Table(v),
398 Tag::TableHead => Tag::TableHead,
399 Tag::TableRow => Tag::TableRow,
400 Tag::TableCell => Tag::TableCell,
401 Tag::Emphasis => Tag::Emphasis,
402 Tag::Strong => Tag::Strong,
403 Tag::Strikethrough => Tag::Strikethrough,
404 Tag::Superscript => Tag::Superscript,
405 Tag::Subscript => Tag::Subscript,
406 Tag::Link {
407 link_type,
408 dest_url,
409 title,
410 id,
411 } => Tag::Link {
412 link_type,
413 dest_url: dest_url.into_static(),
414 title: title.into_static(),
415 id: id.into_static(),
416 },
417 Tag::Image {
418 link_type,
419 dest_url,
420 title,
421 id,
422 } => Tag::Image {
423 link_type,
424 dest_url: dest_url.into_static(),
425 title: title.into_static(),
426 id: id.into_static(),
427 },
428 Tag::MetadataBlock(v) => Tag::MetadataBlock(v),
429 Tag::DefinitionList => Tag::DefinitionList,
430 Tag::DefinitionListTitle => Tag::DefinitionListTitle,
431 Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
432 Tag::MdxJsxFlowElement(s) => Tag::MdxJsxFlowElement(s.into_static()),
433 Tag::MdxJsxTextElement(s) => Tag::MdxJsxTextElement(s.into_static()),
434 }
435 }
436}
437
438/// The end of a `Tag`.
439#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
440#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
441pub enum TagEnd {
442 Paragraph,
443 Heading(HeadingLevel),
444
445 BlockQuote(Option<BlockQuoteKind>),
446 CodeBlock,
447 Directive(DirectiveKind),
448
449 HtmlBlock,
450
451 /// A list, `true` for ordered lists.
452 List(bool),
453 Item,
454 FootnoteDefinition,
455
456 DefinitionList,
457 DefinitionListTitle,
458 DefinitionListDefinition,
459
460 Table,
461 TableHead,
462 TableRow,
463 TableCell,
464
465 Emphasis,
466 Strong,
467 Strikethrough,
468 Superscript,
469 Subscript,
470
471 Link,
472 Image,
473
474 MetadataBlock(MetadataBlockKind),
475
476 MdxJsxFlowElement,
477 MdxJsxTextElement,
478}
479
480/// Make sure `TagEnd` is no more than two bytes in size.
481/// This is why it's used instead of just using `Tag`.
482#[cfg(target_pointer_width = "64")]
483const _STATIC_ASSERT_TAG_END_SIZE: [(); 2] = [(); core::mem::size_of::<TagEnd>()];
484
485impl<'a> From<Tag<'a>> for TagEnd {
486 fn from(value: Tag) -> Self {
487 value.to_end()
488 }
489}
490
491#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
492#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
493pub enum HeadingLevel {
494 H1 = 1,
495 H2,
496 H3,
497 H4,
498 H5,
499 H6,
500}
501
502impl Display for HeadingLevel {
503 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
504 match self {
505 Self::H1 => write!(f, "h1"),
506 Self::H2 => write!(f, "h2"),
507 Self::H3 => write!(f, "h3"),
508 Self::H4 => write!(f, "h4"),
509 Self::H5 => write!(f, "h5"),
510 Self::H6 => write!(f, "h6"),
511 }
512 }
513}
514
515/// Returned when trying to convert a `usize` into a `Heading` but it fails
516/// because the usize isn't a valid heading level
517#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug)]
518pub struct InvalidHeadingLevel(usize);
519
520impl TryFrom<usize> for HeadingLevel {
521 type Error = InvalidHeadingLevel;
522
523 fn try_from(value: usize) -> Result<Self, Self::Error> {
524 match value {
525 1 => Ok(Self::H1),
526 2 => Ok(Self::H2),
527 3 => Ok(Self::H3),
528 4 => Ok(Self::H4),
529 5 => Ok(Self::H5),
530 6 => Ok(Self::H6),
531 _ => Err(InvalidHeadingLevel(value)),
532 }
533 }
534}
535
536/// Type specifier for inline links. See [the Tag::Link](enum.Tag.html#variant.Link) for more information.
537#[derive(Clone, Debug, PartialEq, Copy)]
538#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
539pub enum LinkType {
540 /// Inline link like `[foo](bar)`
541 Inline,
542 /// Reference link like `[foo][bar]`
543 Reference,
544 /// Reference without destination in the document, but resolved by the broken_link_callback
545 ReferenceUnknown,
546 /// Collapsed link like `[foo][]`
547 Collapsed,
548 /// Collapsed link without destination in the document, but resolved by the broken_link_callback
549 CollapsedUnknown,
550 /// Shortcut link like `[foo]`
551 Shortcut,
552 /// Shortcut without destination in the document, but resolved by the broken_link_callback
553 ShortcutUnknown,
554 /// Autolink like `<http://foo.bar/baz>`
555 Autolink,
556 /// Email address in autolink like `<john@example.org>`
557 Email,
558 /// Wikilink link like `[[foo]]` or `[[foo|bar]]`
559 WikiLink {
560 /// `true` if the wikilink was piped.
561 ///
562 /// * `true` - `[[foo|bar]]`
563 /// * `false` - `[[foo]]`
564 has_pothole: bool,
565 },
566}
567
568impl LinkType {
569 /// Map the link type to an equivalent _Unknown link type.
570 fn to_unknown(self) -> Self {
571 match self {
572 LinkType::Reference => LinkType::ReferenceUnknown,
573 LinkType::Collapsed => LinkType::CollapsedUnknown,
574 LinkType::Shortcut => LinkType::ShortcutUnknown,
575 _ => unreachable!(),
576 }
577 }
578}
579
580/// Markdown events that are generated in a preorder traversal of the document
581/// tree, with additional `End` events whenever all of an inner node's children
582/// have been visited.
583#[derive(Clone, Debug, PartialEq)]
584#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
585pub enum Event<'a> {
586 /// Start of a tagged element. Events that are yielded after this event
587 /// and before its corresponding `End` event are inside this element.
588 /// Start and end events are guaranteed to be balanced.
589 #[cfg_attr(feature = "serde", serde(borrow))]
590 Start(Tag<'a>),
591 /// End of a tagged element.
592 End(TagEnd),
593 /// A text node.
594 ///
595 /// All text, outside and inside [`Tag`]s.
596 #[cfg_attr(feature = "serde", serde(borrow))]
597 Text(CowStr<'a>),
598 /// An [inline code node](https://spec.commonmark.org/0.31.2/#code-spans).
599 ///
600 /// ```markdown
601 /// `code`
602 /// ```
603 #[cfg_attr(feature = "serde", serde(borrow))]
604 Code(CowStr<'a>),
605 /// An inline math environment node.
606 /// Requires [`Options::ENABLE_MATH`].
607 ///
608 /// ```markdown
609 /// $math$
610 /// ```
611 #[cfg_attr(feature = "serde", serde(borrow))]
612 InlineMath(CowStr<'a>),
613 /// A display math environment node.
614 /// Requires [`Options::ENABLE_MATH`].
615 ///
616 /// ```markdown
617 /// $$math$$
618 /// ```
619 #[cfg_attr(feature = "serde", serde(borrow))]
620 DisplayMath(CowStr<'a>),
621 /// An HTML node.
622 ///
623 /// A line of HTML inside [`Tag::HtmlBlock`] includes the line break.
624 #[cfg_attr(feature = "serde", serde(borrow))]
625 Html(CowStr<'a>),
626 /// An [inline HTML node](https://spec.commonmark.org/0.31.2/#raw-html).
627 ///
628 /// Contains only the tag itself, e.g. `<open-tag>`, `</close-tag>` or `<!-- comment -->`.
629 ///
630 /// **Note**: Under some conditions HTML can also be parsed as an HTML Block, see [`Tag::HtmlBlock`] for details.
631 #[cfg_attr(feature = "serde", serde(borrow))]
632 InlineHtml(CowStr<'a>),
633 /// A reference to a footnote with given label, defined
634 /// by an event with a [`Tag::FootnoteDefinition`] tag. Definitions and references to them may
635 /// occur in any order. Only parsed and emitted with [`Options::ENABLE_FOOTNOTES`].
636 ///
637 /// ```markdown
638 /// [^1]
639 /// ```
640 #[cfg_attr(feature = "serde", serde(borrow))]
641 FootnoteReference(CowStr<'a>),
642 /// A [soft line break](https://spec.commonmark.org/0.31.2/#soft-line-breaks).
643 ///
644 /// Any line break that isn't a [`HardBreak`](Self::HardBreak), or the end of e.g. a paragraph.
645 SoftBreak,
646 /// A [hard line break](https://spec.commonmark.org/0.31.2/#hard-line-breaks).
647 ///
648 /// A line ending that is either preceded by at least two spaces or `\`.
649 ///
650 /// ```markdown
651 /// hard··
652 /// line\
653 /// breaks
654 /// ```
655 /// *`·` is a space*
656 HardBreak,
657 /// A horizontal ruler.
658 ///
659 /// ```markdown
660 /// ***
661 /// ···---
662 /// _·_··_····_··
663 /// ```
664 /// *`·` is any whitespace*
665 Rule,
666 /// A task list marker, rendered as a checkbox in HTML. Contains a true when it is checked.
667 /// Only parsed and emitted with [`Options::ENABLE_TASKLISTS`].
668 /// ```markdown
669 /// - [ ] unchecked
670 /// - [x] checked
671 /// ```
672 TaskListMarker(bool),
673
674 /// An MDX flow expression (block-level).
675 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
676 /// ```mdx
677 /// {1 + 1}
678 /// ```
679 #[cfg_attr(feature = "serde", serde(borrow))]
680 MdxFlowExpression(CowStr<'a>),
681
682 /// An MDX text expression (inline).
683 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
684 /// ```mdx
685 /// a]n {expression} here
686 /// ```
687 #[cfg_attr(feature = "serde", serde(borrow))]
688 MdxTextExpression(CowStr<'a>),
689
690 /// An MDX ESM block (import/export at document level).
691 /// Only parsed and emitted with [`Options::ENABLE_MDX`].
692 /// ```mdx
693 /// import {Chart} from './chart.js'
694 /// export const meta = {}
695 /// ```
696 #[cfg_attr(feature = "serde", serde(borrow))]
697 MdxEsm(CowStr<'a>),
698}
699
700impl<'a> Event<'a> {
701 pub fn into_static(self) -> Event<'static> {
702 match self {
703 Event::Start(t) => Event::Start(t.into_static()),
704 Event::End(e) => Event::End(e),
705 Event::Text(s) => Event::Text(s.into_static()),
706 Event::Code(s) => Event::Code(s.into_static()),
707 Event::InlineMath(s) => Event::InlineMath(s.into_static()),
708 Event::DisplayMath(s) => Event::DisplayMath(s.into_static()),
709 Event::Html(s) => Event::Html(s.into_static()),
710 Event::InlineHtml(s) => Event::InlineHtml(s.into_static()),
711 Event::FootnoteReference(s) => Event::FootnoteReference(s.into_static()),
712 Event::SoftBreak => Event::SoftBreak,
713 Event::HardBreak => Event::HardBreak,
714 Event::Rule => Event::Rule,
715 Event::TaskListMarker(b) => Event::TaskListMarker(b),
716 Event::MdxFlowExpression(s) => Event::MdxFlowExpression(s.into_static()),
717 Event::MdxTextExpression(s) => Event::MdxTextExpression(s.into_static()),
718 Event::MdxEsm(s) => Event::MdxEsm(s.into_static()),
719 }
720 }
721}
722
723/// Table column text alignment.
724#[derive(Copy, Clone, Debug, PartialEq)]
725#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
726pub enum Alignment {
727 /// Default text alignment.
728 None,
729 Left,
730 Center,
731 Right,
732}
733
734bitflags::bitflags! {
735 /// Option struct containing flags for enabling extra features
736 /// that are not part of the CommonMark spec.
737 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
738 pub struct Options: u32 {
739 const ENABLE_TABLES = 1 << 1;
740 /// GitHub-compatible footnote syntax.
741 ///
742 /// Footnotes are referenced with the syntax `[^IDENT]`,
743 /// and defined with an identifier followed by a colon at top level.
744 ///
745 /// ---
746 ///
747 /// ```markdown
748 /// Footnote referenced [^1].
749 ///
750 /// [^1]: footnote defined
751 /// ```
752 ///
753 /// Footnote referenced [^1].
754 ///
755 /// [^1]: footnote defined
756 const ENABLE_FOOTNOTES = 1 << 2;
757 const ENABLE_STRIKETHROUGH = 1 << 3;
758 const ENABLE_TASKLISTS = 1 << 4;
759 /// Enables replacement of ASCII punctuation characters with
760 /// Unicode ligatures and smart quotes.
761 ///
762 /// This includes replacing `--` with `–`, `---` with `—`, `...` with `…`,
763 /// `”quote”` with `\u{201c}quote\u{201d}`, and `’quote’` with `\u{2018}quote\u{2019}`.
764 ///
765 /// Equivalent to enabling all of `ENABLE_SMART_QUOTES`,
766 /// `ENABLE_SMART_DASHES`, and `ENABLE_SMART_ELLIPSES`.
767 const ENABLE_SMART_PUNCTUATION = 1 << 5;
768 /// Replace straight quotes (`”`, `’`) with curly/smart quotes.
769 const ENABLE_SMART_QUOTES = 1 << 18;
770 /// Replace `--` with en-dash and `---` with em-dash.
771 const ENABLE_SMART_DASHES = 1 << 19;
772 /// Replace `...` with ellipsis (`…`).
773 const ENABLE_SMART_ELLIPSES = 1 << 20;
774 /// Extension to allow headings to have ID and classes.
775 ///
776 /// `# text { #id .class1 .class2 myattr other_attr=myvalue }`
777 /// is interpreted as a level 1 heading
778 /// with the content `text`, ID `id`, classes `class1` and `class2` and
779 /// custom attributes `myattr` (without value) and
780 /// `other_attr` with value `myvalue`.
781 /// Note that ID, classes, and custom attributes should be space-separated.
782 const ENABLE_HEADING_ATTRIBUTES = 1 << 6;
783 /// Metadata blocks in YAML style, i.e.:
784 /// - starting with a `---` line
785 /// - ending with a `---` or `...` line
786 const ENABLE_YAML_STYLE_METADATA_BLOCKS = 1 << 7;
787 /// Metadata blocks delimited by:
788 /// - `+++` line at start
789 /// - `+++` line at end
790 const ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS = 1 << 8;
791 /// With this feature enabled, two events `Event::InlineMath` and `Event::DisplayMath`
792 /// are emitted that conventionally contain TeX formulas.
793 const ENABLE_MATH = 1 << 10;
794 /// Misc GitHub Flavored Markdown features not supported in CommonMark.
795 const ENABLE_GFM = 1 << 11;
796 /// GitHub-style blockquote alerts ([!NOTE], [!TIP], [!IMPORTANT], [!WARNING], [!CAUTION]).
797 /// Not part of the GFM spec — this is a GitHub-specific feature.
798 const ENABLE_GITHUB_ALERTS = 1 << 21;
799 /// Commonmark-HS-Extensions compatible definition lists.
800 ///
801 /// ```markdown
802 /// title 1
803 /// : definition 1
804 ///
805 /// title 2
806 /// : definition 2a
807 /// : definition 2b
808 /// ```
809 const ENABLE_DEFINITION_LIST = 1 << 12;
810 const ENABLE_SUPERSCRIPT = 1 << 13;
811 const ENABLE_SUBSCRIPT = 1 << 14;
812 /// Obsidian-style Wikilinks.
813 const ENABLE_WIKILINKS = 1 << 15;
814 /// Directives: container (:::), leaf (::), and text (:) directives.
815 const ENABLE_DIRECTIVE = 1 << 16;
816 /// MDX: enables JSX elements, expressions, and ESM import/export.
817 const ENABLE_MDX = 1 << 17;
818 }
819}
820
821impl Options {
822 pub(crate) fn has_smart_quotes(&self) -> bool {
823 self.contains(Options::ENABLE_SMART_PUNCTUATION)
824 || self.contains(Options::ENABLE_SMART_QUOTES)
825 }
826
827 pub(crate) fn has_smart_dashes(&self) -> bool {
828 self.contains(Options::ENABLE_SMART_PUNCTUATION)
829 || self.contains(Options::ENABLE_SMART_DASHES)
830 }
831
832 pub(crate) fn has_smart_ellipses(&self) -> bool {
833 self.contains(Options::ENABLE_SMART_PUNCTUATION)
834 || self.contains(Options::ENABLE_SMART_ELLIPSES)
835 }
836}