markdown_ast/lib.rs
1//! Parse a Markdown input string into a sequence of Markdown abstract syntax
2//! tree [`Block`]s.
3//!
4//! This crate is intentionally designed to interoperate well with the
5//! [`pulldown-cmark`](https://crates.io/crate/pulldown-cmark) crate and the
6//! ecosystem around it. See [Motivation and relation to pulldown-cmark](#motivation-and-relation-to-pulldown-cmark)
7//! for more information.
8//!
9//! The AST types are designed to align with the structure defined
10//! by the [CommonMark Specification](https://spec.commonmark.org/).
11//!
12//! # Quick Examples
13//!
14//! Parse simple Markdown into an AST:
15//!
16//! ```
17//! use markdown_ast::{markdown_to_ast, Block, Inline, Inlines};
18//! # use pretty_assertions::assert_eq;
19//!
20//! let ast = markdown_to_ast("
21//! Hello! This is a paragraph **with bold text**.
22//! ");
23//!
24//! assert_eq!(ast, vec![
25//! Block::Paragraph(Inlines(vec![
26//! Inline::Text("Hello! This is a paragraph ".to_owned()),
27//! Inline::Strong(Inlines(vec![
28//! Inline::Text("with bold text".to_owned()),
29//! ])),
30//! Inline::Text(".".to_owned())
31//! ]))
32//! ]);
33//! ```
34//!
35//!
36//!
37//! # API Overview
38//!
39//! | Function | Input | Output |
40//! |------------------------------------|------------|--------------|
41//! | [`markdown_to_ast()`] | `&str` | `Vec<Block>` |
42//! | [`ast_to_markdown()`] | `&[Block]` | `String` |
43//! | [`ast_to_events()`] | `&[Block]` | `Vec<Event>` |
44//! | [`events_to_ast()`] | `&[Event]` | `Vec<Block>` |
45//! | [`events_to_markdown()`] | `&[Event]` | `String` |
46//! | [`markdown_to_events()`] | `&str` | `Vec<Event>` |
47//! | [`canonicalize()`] | `&str` | `String` |
48//!
49//! ##### Terminology
50//!
51//! This crate is able to process and manipulate Markdown in three different
52//! representations:
53//!
54//! | Term | Type | Description |
55//! |----------|----------------------|-------------------------------------|
56//! | Markdown | `String` | Raw Markdown source / output string |
57//! | Events | `&[Event]` | Markdown parsed by [`pulldown-cmark`](https://crates.io/crates/pulldown-cmark) into a flat sequence of parser [`Event`]s |
58//! | AST | `Block` / `&[Block]` | Markdown parsed by `markdown-ast` into a hierarchical structure of [`Block`]s |
59//!
60//! ##### Processing Steps
61//!
62//! ```text
63//! String => Events => Blocks => Events => String
64//! |_____ A ______| |______ C _____|
65//! |______ B _____| |______ D _____|
66//! |__________ E ___________|
67//! |___________ F __________|
68//! |____________________ G _____________________|
69//! ```
70//!
71//! - **A** — [`markdown_to_events()`]
72//! - **B** — [`events_to_ast()`]
73//! - **C** — [`ast_to_events()`]
74//! - **D** — [`events_to_markdown()`]
75//! - **E** — [`markdown_to_ast()`]
76//! - **F** — [`ast_to_markdown()`]
77//! - **G** — [`canonicalize()`]
78//!
79//! Note: **A** wraps [`pulldown_cmark::Parser`], and **D** wraps
80//! [`pulldown_cmark_to_cmark::cmark()`].
81//!
82//!
83//!
84//! # Detailed Examples
85//!
86//! #### Parse varied Markdown to an AST representation:
87//!
88//! ```
89//! use markdown_ast::{
90//! markdown_to_ast, Block, HeadingLevel, Inline, Inlines, ListItem
91//! };
92//! # use pretty_assertions::assert_eq;
93//!
94//! let ast = markdown_to_ast("
95//! ## An Example Document
96//!
97//! This is a paragraph that
98//! is split across *multiple* lines.
99//!
100//! * This is a list item
101//! ");
102//!
103//! assert_eq!(ast, vec![
104//! Block::Heading(
105//! HeadingLevel::H1,
106//! Inlines(vec![
107//! Inline::Text("An Example Document".to_owned())
108//! ])
109//! ),
110//! Block::Paragraph(Inlines(vec![
111//! Inline::Text("This is a paragraph that".to_owned()),
112//! Inline::SoftBreak,
113//! Inline::Text("is split across ".to_owned()),
114//! Inline::Emphasis(Inlines(vec![
115//! Inline::Text("multiple".to_owned()),
116//! ])),
117//! Inline::Text(" lines.".to_owned()),
118//! ])),
119//! Block::List(vec![
120//! ListItem(vec![
121//! Block::Paragraph(Inlines(vec![
122//! Inline::Text("This is a list item".to_owned())
123//! ]))
124//! ])
125//! ])
126//! ]);
127//! ```
128//!
129//! #### Synthesize Markdown using programmatic construction of the document:
130//!
131//! *Note:* This is a more user friendly alternative to a "string builder"
132//! approach where the raw Markdown string is constructed piece by piece,
133//! which suffers from extra bookkeeping that must be done to manage things like
134//! indent level and soft vs hard breaks.
135//!
136//! ```
137//! use markdown_ast::{
138//! ast_to_markdown, Block, Inline, Inlines, ListItem,
139//! HeadingLevel,
140//! };
141//! # use pretty_assertions::assert_eq;
142//!
143//! let tech_companies = vec![
144//! ("Apple", 1976, 164_000),
145//! ("Microsoft", 1975, 221_000),
146//! ("Nvidia", 1993, 29_600),
147//! ];
148//!
149//! let ast = vec![
150//! Block::Heading(HeadingLevel::H1, Inlines::plain_text("Tech Companies")),
151//! Block::plain_text_paragraph("The following are major tech companies:"),
152//! Block::List(Vec::from_iter(
153//! tech_companies
154//! .into_iter()
155//! .map(|(company_name, founded, employee_count)| {
156//! ListItem(vec![
157//! Block::paragraph(vec![Inline::plain_text(company_name)]),
158//! Block::List(vec![
159//! ListItem::plain_text(format!("Founded: {founded}")),
160//! ListItem::plain_text(format!("Employee count: {employee_count}"))
161//! ])
162//! ])
163//! })
164//! ))
165//! ];
166//!
167//! let markdown: String = ast_to_markdown(&ast);
168//!
169//! assert_eq!(markdown, "\
170//! ## Tech Companies
171//!
172//! The following are major tech companies:
173//!
174//! * Apple
175//!
176//! * Founded: 1976
177//!
178//! * Employee count: 164000
179//!
180//! * Microsoft
181//!
182//! * Founded: 1975
183//!
184//! * Employee count: 221000
185//!
186//! * Nvidia
187//!
188//! * Founded: 1993
189//!
190//! * Employee count: 29600\
191//! ");
192//!
193//! ```
194//!
195//! # Known Issues
196//!
197//! Currently `markdown-ast` does not escape Markdown content appearing in
198//! leaf inline text:
199//!
200//! ```
201//! use markdown_ast::{ast_to_markdown, Block};
202//!
203//! let ast = vec![
204//! Block::plain_text_paragraph("In the equation a*b*c ...")
205//! ];
206//!
207//! let markdown = ast_to_markdown(&ast);
208//!
209//! assert_eq!(markdown, "In the equation a*b*c ...");
210//! ```
211//!
212//! which will render as:
213//!
214//! > In the equation a*b*c ...
215//!
216//! with the asterisks interpreted as emphasis formatting markers, contrary to
217//! the intention of the author.
218//!
219//! Fixing this robustly will require either:
220//!
221//! * Adding automatic escaping of Markdown characters in [`Inline::Text`]
222//! during rendering (not ideal)
223//!
224//! * Adding pre-construction validation checks for [`Inline::Text`] that
225//! prevent constructing an `Inline` with Markdown formatting characters that
226//! have not been escaped correctly by the user.
227//!
228//! In either case, fixing this bug will be considered a **semver exempt**
229//! change in behavior to `markdown-ast`.
230//!
231//! # Motivation and relation to `pulldown-cmark`
232//!
233//! [`pulldown-cmark`](https://crates.io/crates/pulldown-cmark) is a popular
234//! Markdown parser crate. It provides a streaming event (pull parsing) based
235//! representation of a Markdown document. That representation is useful for
236//! efficient transformation of a Markdown document into another format, often
237//! HTML.
238//!
239//! However, a streaming parser representation is less amenable to programmatic
240//! construction or human-understandable transformations of Markdown documents.
241//!
242//! `markdown-ast` provides a abstract syntax tree (AST) representation of
243//! Markdown that is easy to construct and work with.
244//!
245//! Additionally, `pulldown-cmark` is widely used in the Rust crate ecosystem,
246//! for example for [`mdbook`](https://crates.io/crates/mdbook) extensions.
247//! Interoperability with `pulldown-cmark` is an intentional design choice for
248//! the usability of `markdown-ast`; one could imagine `markdown-ast` instead
249//! abstracting over the underlying parser implementation, but my view is that
250//! would limit the utility of `markdown-ast`.
251//!
252
253mod unflatten;
254
255mod from_events;
256mod to_events;
257
258/// Ensure that doc tests in the README.md file get run.
259///
260/// See: <https://connorgray.com/reference/creating-a-new-rust-crate#test-readmemd-examples>
261mod test_readme {
262 #![doc = include_str!("../README.md")]
263}
264
265use pulldown_cmark::{self as md, CowStr, Event};
266
267pub use pulldown_cmark::HeadingLevel;
268
269//======================================
270// AST Representation
271//======================================
272
273/// A piece of structural Markdown content.
274/// (CommonMark: [blocks](https://spec.commonmark.org/0.30/#blocks),
275/// [container blocks](https://spec.commonmark.org/0.30/#container-blocks))
276#[derive(Debug, Clone, PartialEq)]
277pub enum Block {
278 /// CommonMark: [paragraphs](https://spec.commonmark.org/0.30/#paragraphs)
279 Paragraph(Inlines),
280 /// CommonMark: [lists](https://spec.commonmark.org/0.30/#lists)
281 List(Vec<ListItem>),
282 /// CommonMark: [ATX heading](https://spec.commonmark.org/0.30/#atx-heading)
283 Heading(HeadingLevel, Inlines),
284 /// An indented or fenced code block.
285 ///
286 /// CommonMark: [indented code blocks](https://spec.commonmark.org/0.30/#indented-code-blocks),
287 /// [fenced code blocks](https://spec.commonmark.org/0.30/#fenced-code-blocks)
288 CodeBlock {
289 /// Indicates whether this is a fenced or indented code block.
290 ///
291 /// If this `CodeBlock` is a fenced code block, this contains its info
292 /// string.
293 ///
294 /// CommonMark: [info string](https://spec.commonmark.org/0.30/#info-string)
295 kind: CodeBlockKind,
296 code: String,
297 },
298 /// CommonMark: [block quotes](https://spec.commonmark.org/0.30/#block-quotes)
299 BlockQuote {
300 // TODO: Document
301 kind: Option<md::BlockQuoteKind>,
302 blocks: Vec<Block>,
303 },
304 Table {
305 alignments: Vec<md::Alignment>,
306 headers: Vec<Inlines>,
307 rows: Vec<Vec<Inlines>>,
308 },
309 /// CommonMark: [thematic breaks](https://spec.commonmark.org/0.30/#thematic-breaks)
310 Rule,
311}
312
313/// A sequence of [`Inline`]s.
314/// (CommonMark: [inlines](https://spec.commonmark.org/0.30/#inlines))
315#[derive(Debug, Clone, PartialEq)]
316pub struct Inlines(pub Vec<Inline>);
317
318/// An item in a list. (CommonMark: [list items](https://spec.commonmark.org/0.30/#list-items))
319#[derive(Debug, Clone, PartialEq)]
320pub struct ListItem(pub Vec<Block>);
321
322/// An inline piece of atomic Markdown content.
323/// (CommonMark: [inlines](https://spec.commonmark.org/0.30/#inlines))
324#[derive(Debug, Clone, PartialEq)]
325pub enum Inline {
326 Text(String),
327 /// CommonMark: [emphasis](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
328 Emphasis(Inlines),
329 /// CommonMark: [strong emphasis](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
330 Strong(Inlines),
331 /// Strikethrough styled text. (Non-standard.)
332 Strikethrough(Inlines),
333 /// CommonMark: [code spans](https://spec.commonmark.org/0.30/#code-spans)
334 Code(String),
335 /// CommonMark: [links](https://spec.commonmark.org/0.30/#links)
336 // TODO:
337 // Document every type of Inline::Link value and what its equivalent source
338 // is.
339 Link {
340 link_type: md::LinkType,
341 /// CommonMark: [link destination](https://spec.commonmark.org/0.30/#link-destination)
342 dest_url: String,
343 /// CommonMark: [link title](https://spec.commonmark.org/0.30/#link-title)
344 title: String,
345 /// CommonMark: [link label](https://spec.commonmark.org/0.30/#link-label)
346 id: String,
347 /// CommonMark: [link text](https://spec.commonmark.org/0.30/#link-text)
348 content_text: Inlines,
349 },
350 /// CommonMark: [soft line breaks](https://spec.commonmark.org/0.30/#soft-line-breaks)
351 SoftBreak,
352 /// CommonMark: [hard line breaks](https://spec.commonmark.org/0.30/#hard-line-breaks)
353 HardBreak,
354}
355
356#[derive(Debug, Clone, PartialEq)]
357pub enum CodeBlockKind {
358 Fenced(String),
359 Indented,
360}
361
362//======================================
363// Public API Functions
364//======================================
365
366/// Parse Markdown input string into AST [`Block`]s.
367pub fn markdown_to_ast(input: &str) -> Vec<Block> {
368 /* For Markdown parsing debugging.
369 {
370 let mut options = md::Options::empty();
371 options.insert(md::Options::ENABLE_STRIKETHROUGH);
372 let parser = md::Parser::new_ext(input, options);
373
374 let events: Vec<_> = parser.into_iter().collect();
375
376 println!("==== All events =====\n");
377 for event in &events {
378 println!("{event:?}");
379 }
380 println!("\n=====================\n");
381
382 println!("==== Unflattened events =====\n");
383 for event in unflatten::parse_markdown_to_unflattened_events(input) {
384 println!("{event:#?}")
385 }
386 println!("=============================\n");
387 }
388 */
389
390 let events = markdown_to_events(input);
391
392 return events_to_ast(events);
393}
394
395/// Convert AST [`Block`]s into a Markdown string.
396pub fn ast_to_markdown(blocks: &[Block]) -> String {
397 let events = ast_to_events(blocks);
398
399 return events_to_markdown(events);
400}
401
402/// Convert [`Event`]s into a Markdown string.
403///
404/// This is a thin wrapper around
405/// [`pulldown_cmark_to_cmark::cmark_with_options`], provided in this crate for
406/// consistency and ease of use.
407pub fn events_to_markdown<'e, I: IntoIterator<Item = Event<'e>>>(
408 events: I,
409) -> String {
410 let mut string = String::new();
411
412 let options = default_to_markdown_options();
413
414 let _: pulldown_cmark_to_cmark::State =
415 pulldown_cmark_to_cmark::cmark_with_options(
416 events.into_iter(),
417 &mut string,
418 options,
419 )
420 .expect("error converting Event sequent to Markdown string");
421
422 string
423}
424
425/// Convert AST [`Block`]s into an [`Event`] sequence.
426pub fn ast_to_events(blocks: &[Block]) -> Vec<Event> {
427 let mut events: Vec<Event> = Vec::new();
428
429 for block in blocks {
430 let events = &mut events;
431
432 crate::to_events::block_to_events(&block, events);
433 }
434
435 events
436}
437
438/// Parse [`Event`]s into AST [`Block`]s.
439pub fn events_to_ast<'i, I: IntoIterator<Item = Event<'i>>>(
440 events: I,
441) -> Vec<Block> {
442 let events =
443 unflatten::parse_markdown_to_unflattened_events(events.into_iter());
444
445 crate::from_events::ast_events_to_ast(events)
446}
447
448/// Parse Markdown input string into [`Event`]s.
449///
450/// This is a thin wrapper around [`pulldown_cmark::Parser`], provided in this
451/// crate for consistency and ease of use.
452pub fn markdown_to_events<'i>(
453 input: &'i str,
454) -> impl Iterator<Item = Event<'i>> {
455 // Set up options and parser. Strikethroughs are not part of the CommonMark standard
456 // and we therefore must enable it explicitly.
457 let mut options = md::Options::empty();
458 options.insert(md::Options::ENABLE_STRIKETHROUGH);
459 options.insert(md::Options::ENABLE_TABLES);
460 md::Parser::new_ext(input, options)
461}
462
463/// Canonicalize (or format) a Markdown input by parsing and then converting
464/// back to a string.
465///
466/// **⚠️ Warning ⚠️:** This function is **semver exempt**. The precise
467/// canonicalization behavior may change in MINOR or PATCH versions of
468/// markdown-ast. (Stabilizing the behavior of this function will require
469/// additional options to configure the behavior of
470/// [pulldown-cmark-to-cmark](https://crates.io/crates/pulldown-cmark-to-cmark).)
471///
472/// # Examples
473///
474/// List items using `-` (minus) are canonicalized to the `*` (asterisk) list
475/// marker type:
476///
477/// ```
478/// use markdown_ast::canonicalize;
479/// assert_eq!(
480/// canonicalize("\
481/// - Foo
482/// - Bar
483/// "),
484/// "\
485/// * Foo
486///
487/// * Bar"
488/// )
489/// ```
490///
491/// Hard breaks ending in backslash are canonicalized to the "two spaces at the
492/// end of the line" form:
493///
494/// ```
495/// use markdown_ast::canonicalize;
496/// assert_eq!(
497/// canonicalize(r#"
498/// This ends in a hard break.\
499/// This is a new line."#),
500/// // Note: The two spaces at the end of the first line below may not be
501/// // visible, but they're there.
502/// "\
503/// This ends in a hard break.
504/// This is a new line."
505/// )
506/// ```
507pub fn canonicalize(input: &str) -> String {
508 let ast = markdown_to_ast(input);
509
510 return ast_to_markdown(&ast);
511}
512
513fn default_to_markdown_options() -> pulldown_cmark_to_cmark::Options<'static> {
514 pulldown_cmark_to_cmark::Options {
515 // newlines_after_paragraph: 2,
516 // newlines_after_headline: 0,
517 // newlines_after_codeblock: 0,
518 // newlines_after_list: 1,
519 // newlines_after_rest: 0,
520 code_block_token_count: 3,
521 ..pulldown_cmark_to_cmark::Options::default()
522 }
523}
524
525//======================================
526// Impls
527//======================================
528
529impl Inline {
530 /// Construct a inline containing a piece of plain text.
531 pub fn plain_text<S: Into<String>>(s: S) -> Self {
532 Inline::Text(s.into())
533 }
534
535 pub fn emphasis(inline: Inline) -> Self {
536 Inline::Emphasis(Inlines(vec![inline]))
537 }
538
539 pub fn strong(inline: Inline) -> Self {
540 Inline::Strong(Inlines(vec![inline]))
541 }
542
543 pub fn strikethrough(inline: Inline) -> Self {
544 Inline::Strikethrough(Inlines(vec![inline]))
545 }
546
547 pub fn code<S: Into<String>>(s: S) -> Self {
548 Inline::Code(s.into())
549 }
550}
551
552impl Inlines {
553 /// Construct an inlines sequence containing a single inline piece of plain
554 /// text.
555 pub fn plain_text<S: Into<String>>(inline: S) -> Self {
556 return Inlines(vec![Inline::Text(inline.into())]);
557 }
558}
559
560impl Block {
561 /// Construct a paragraph block containing a single inline piece of plain
562 /// text.
563 pub fn plain_text_paragraph<S: Into<String>>(inline: S) -> Self {
564 return Block::Paragraph(Inlines(vec![Inline::Text(inline.into())]));
565 }
566
567 pub fn paragraph(text: Vec<Inline>) -> Block {
568 Block::Paragraph(Inlines(text))
569 }
570}
571
572impl ListItem {
573 /// Construct a list item containing a single inline piece of plain text.
574 pub fn plain_text<S: Into<String>>(inline: S) -> Self {
575 return ListItem(vec![Block::Paragraph(Inlines(vec![Inline::Text(
576 inline.into(),
577 )]))]);
578 }
579}
580
581impl CodeBlockKind {
582 pub fn info_string(&self) -> Option<&str> {
583 match self {
584 CodeBlockKind::Fenced(info_string) => Some(info_string.as_str()),
585 CodeBlockKind::Indented => None,
586 }
587 }
588
589 pub(crate) fn from_pulldown_cmark(kind: md::CodeBlockKind) -> Self {
590 match kind {
591 md::CodeBlockKind::Indented => CodeBlockKind::Indented,
592 md::CodeBlockKind::Fenced(info_string) => {
593 CodeBlockKind::Fenced(info_string.to_string())
594 },
595 }
596 }
597
598 pub(crate) fn to_pulldown_cmark<'s>(&'s self) -> md::CodeBlockKind<'s> {
599 match self {
600 CodeBlockKind::Fenced(info) => {
601 md::CodeBlockKind::Fenced(CowStr::from(info.as_str()))
602 },
603 CodeBlockKind::Indented => md::CodeBlockKind::Indented,
604 }
605 }
606}
607
608impl IntoIterator for Inlines {
609 type Item = Inline;
610 type IntoIter = std::vec::IntoIter<Inline>;
611
612 fn into_iter(self) -> Self::IntoIter {
613 let Inlines(vec) = self;
614 vec.into_iter()
615 }
616}
617
618//======================================
619// Tests: Markdown to AST parsing
620//======================================
621
622#[test]
623fn test_markdown_to_ast() {
624 use indoc::indoc;
625 use pretty_assertions::assert_eq;
626
627 assert_eq!(
628 markdown_to_ast("hello"),
629 vec![Block::paragraph(vec![Inline::Text("hello".into())])]
630 );
631
632 //--------------
633 // Styled text
634 //--------------
635
636 assert_eq!(
637 markdown_to_ast("*hello*"),
638 vec![Block::paragraph(vec![Inline::emphasis(Inline::Text(
639 "hello".into()
640 ))])]
641 );
642
643 assert_eq!(
644 markdown_to_ast("**hello**"),
645 vec![Block::paragraph(vec![Inline::strong(Inline::Text(
646 "hello".into()
647 ))])]
648 );
649
650 assert_eq!(
651 markdown_to_ast("~~hello~~"),
652 vec![Block::paragraph(vec![Inline::strikethrough(Inline::Text(
653 "hello".into()
654 ))])]
655 );
656
657 assert_eq!(
658 markdown_to_ast("**`strong code`**"),
659 vec![Block::paragraph(vec![Inline::strong(Inline::Code(
660 "strong code".into()
661 ))])]
662 );
663
664 assert_eq!(
665 markdown_to_ast("~~`foo`~~"),
666 vec![Block::paragraph(vec![Inline::strikethrough(Inline::Code(
667 "foo".into()
668 ))])]
669 );
670
671 assert_eq!(
672 markdown_to_ast("**[example](example.com)**"),
673 vec![Block::paragraph(vec![Inline::strong(Inline::Link {
674 link_type: md::LinkType::Inline,
675 dest_url: "example.com".into(),
676 title: String::new(),
677 id: String::new(),
678 content_text: Inlines(vec![Inline::Text("example".into())]),
679 })])]
680 );
681
682 // Test composition of emphasis, strong, strikethrough and code
683 assert_eq!(
684 markdown_to_ast("_~~**`foo`**~~_"),
685 vec![Block::paragraph(vec![Inline::emphasis(
686 Inline::strikethrough(Inline::strong(Inline::Code("foo".into())))
687 )])]
688 );
689
690 //--------------
691 // Lists
692 //--------------
693
694 assert_eq!(
695 markdown_to_ast("* hello"),
696 vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
697 Inline::Text("hello".into())
698 ])])])]
699 );
700
701 // List items with styled text
702
703 assert_eq!(
704 markdown_to_ast("* *hello*"),
705 vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
706 Inline::emphasis(Inline::Text("hello".into()))
707 ])])])]
708 );
709
710 assert_eq!(
711 markdown_to_ast("* **hello**"),
712 vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
713 Inline::strong(Inline::Text("hello".into()))
714 ])])])]
715 );
716
717 assert_eq!(
718 markdown_to_ast("* ~~hello~~"),
719 vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
720 Inline::strikethrough(Inline::Text("hello".into()),)
721 ])])])]
722 );
723
724 //----------------------------------
725
726 let input = "\
727* And **bold** text.
728
729 * With nested list items.
730
731 * `md2nb` supports nested lists up to three levels deep.
732";
733
734 let ast = vec![Block::List(vec![ListItem(vec![
735 Block::paragraph(vec![
736 Inline::plain_text("And "),
737 Inline::strong(Inline::plain_text("bold")),
738 Inline::plain_text(" text."),
739 ]),
740 Block::List(vec![ListItem(vec![
741 Block::paragraph(vec![Inline::plain_text(
742 "With nested list items.",
743 )]),
744 Block::List(vec![ListItem(vec![Block::paragraph(vec![
745 Inline::code("md2nb"),
746 Inline::plain_text(
747 " supports nested lists up to three levels deep.",
748 ),
749 ])])]),
750 ])]),
751 ])])];
752
753 assert_eq!(markdown_to_ast(input), ast);
754
755 // Sanity check conversion to event stream.
756 assert_eq!(
757 markdown_to_events(input).collect::<Vec<_>>(),
758 ast_to_events(&ast)
759 );
760
761 //----------------------------------
762 // Test structures
763 //----------------------------------
764
765 assert_eq!(
766 markdown_to_ast(indoc!(
767 "
768 * hello
769
770 world
771 "
772 )),
773 vec![Block::List(vec![ListItem(vec![
774 Block::paragraph(vec![Inline::Text("hello".into())]),
775 Block::paragraph(vec![Inline::Text("world".into())])
776 ])])]
777 );
778
779 #[rustfmt::skip]
780 assert_eq!(
781 markdown_to_ast(indoc!(
782 "
783 # Example
784
785 * A
786 - A.A
787
788 hello world
789
790 * *A.A.A*
791 "
792 )),
793 vec![
794 Block::Heading(
795 HeadingLevel::H1,
796 Inlines(vec![Inline::Text("Example".into())])
797 ),
798 Block::List(vec![
799 ListItem(vec![
800 Block::paragraph(vec![Inline::Text("A".into())]),
801 Block::List(vec![
802 ListItem(vec![
803 Block::paragraph(vec![Inline::Text("A.A".into())]),
804 Block::paragraph(vec![Inline::Text("hello world".into())]),
805 Block::List(vec![
806 ListItem(vec![
807 Block::paragraph(vec![
808 Inline::emphasis(
809 Inline::Text(
810 "A.A.A".into()),
811 )
812 ])
813 ])
814 ])
815 ])
816 ])
817 ])
818 ])
819 ]
820 );
821
822 #[rustfmt::skip]
823 assert_eq!(
824 markdown_to_ast(indoc!(
825 "
826 * A
827 - A.A
828 * A.A.A
829 - A.B
830 - A.C
831 "
832 )),
833 vec![
834 Block::List(vec![
835 ListItem(vec![
836 Block::paragraph(vec![Inline::Text("A".into())]),
837 Block::List(vec![
838 ListItem(vec![
839 Block::paragraph(vec![Inline::Text("A.A".into())]),
840 Block::List(vec![ListItem(vec![
841 Block::paragraph(vec![Inline::Text("A.A.A".into())]),
842 ])])
843 ]),
844 ListItem(vec![
845 Block::paragraph(vec![Inline::Text("A.B".into())]),
846 ]),
847 ListItem(vec![
848 Block::paragraph(vec![Inline::Text("A.C".into())]),
849 ])
850 ])
851 ])
852 ])
853 ]
854 );
855
856 #[rustfmt::skip]
857 assert_eq!(
858 markdown_to_ast(indoc!(
859 "
860 # Example
861
862 * A
863 - A.A
864 - A.B
865 * A.C
866 "
867 )),
868 vec![
869 Block::Heading(
870 HeadingLevel::H1,
871 Inlines(vec![Inline::Text("Example".into())])
872 ),
873 Block::List(vec![
874 ListItem(vec![
875 Block::paragraph(vec![Inline::Text("A".into())]),
876 Block::List(vec![
877 ListItem(vec![
878 Block::paragraph(vec![Inline::Text("A.A".into())]),
879 ]),
880 ListItem(vec![
881 Block::paragraph(vec![Inline::Text("A.B".into())]),
882 ]),
883 ]),
884 Block::List(vec![
885 ListItem(vec![
886 Block::paragraph(vec![Inline::Text("A.C".into())])
887 ])
888 ]),
889 ]),
890 ])
891 ]
892 );
893
894 #[rustfmt::skip]
895 assert_eq!(
896 markdown_to_ast(indoc!(
897 "
898 * A
899 - A.A
900 - A.B
901
902 separate paragraph
903
904 - A.C
905 "
906 )),
907 vec![
908 Block::List(vec![
909 ListItem(vec![
910 Block::paragraph(vec![Inline::Text("A".into())]),
911 Block::List(vec![
912 ListItem(vec![
913 Block::paragraph(vec![Inline::Text("A.A".into())]),
914 ]),
915 ListItem(vec![
916 Block::paragraph(vec![Inline::Text("A.B".into())]),
917 Block::paragraph(vec![Inline::Text("separate paragraph".into())]),
918 ]),
919 ListItem(vec![
920 Block::paragraph(vec![Inline::Text("A.C".into())]),
921 ])
922 ])
923 ])
924 ])
925 ]
926 );
927
928 #[rustfmt::skip]
929 assert_eq!(
930 markdown_to_ast(indoc!(
931 "
932 # Example
933
934 * A
935 - A.A
936 * A.A.A
937 **soft break**
938
939 - A.B
940
941 separate paragraph
942
943 - A.C
944 "
945 )),
946 vec![
947 Block::Heading(
948 HeadingLevel::H1,
949 Inlines(vec![Inline::Text("Example".into())])
950 ),
951 Block::List(vec![
952 ListItem(vec![
953 Block::paragraph(vec![Inline::Text("A".into())]),
954 Block::List(vec![
955 ListItem(vec![
956 Block::paragraph(vec![Inline::Text("A.A".into())]),
957 Block::List(vec![
958 ListItem(vec![
959 Block::paragraph(vec![
960 Inline::Text("A.A.A".into()),
961 Inline::SoftBreak,
962 Inline::strong(
963 Inline::Text("soft break".into()),
964 )
965 ]),
966 ])
967 ]),
968 ]),
969 ListItem(vec![
970 Block::paragraph(vec![Inline::Text("A.B".into())]),
971 Block::paragraph(vec![Inline::Text("separate paragraph".into())]),
972 ]),
973 ListItem(vec![
974 Block::paragraph(vec![Inline::Text("A.C".into())]),
975 ]),
976 ])
977 ])
978 ])
979 ]
980 );
981}
982
983//======================================
984// Tests: AST to Markdown string
985//======================================
986
987#[test]
988fn test_ast_to_markdown() {
989 use indoc::indoc;
990 // use pretty_assertions::assert_eq;
991
992 assert_eq!(
993 ast_to_markdown(&[Block::paragraph(vec![Inline::Text(
994 "hello".into()
995 )])]),
996 "hello"
997 );
998
999 assert_eq!(
1000 ast_to_markdown(&[Block::List(vec![ListItem(vec![
1001 Block::paragraph(vec![Inline::Text("hello".into())]),
1002 Block::paragraph(vec![Inline::Text("world".into())])
1003 ])])]),
1004 indoc!(
1005 "
1006 * hello
1007
1008 world"
1009 ),
1010 )
1011}
1012
1013/// Tests that some of the larger Markdown documents in this repository
1014/// all round-trip when processed:
1015#[test]
1016fn test_md_documents_roundtrip() {
1017 let kitchen_sink_md =
1018 include_str!("../../md2nb/docs/examples/kitchen-sink.md");
1019
1020 // FIXME:
1021 // Fix the bugs requiring these hacky removals from kitchen-sink.md
1022 // that are needed to make the tests below pass.
1023 let kitchen_sink_md = kitchen_sink_md
1024 .replace("\n \"This is an indented code block.\"\n", "")
1025 .replace("\nThis is a [shortcut] reference link.\n", "")
1026 .replace("\nThis is a [full reference][full reference] link.\n", "")
1027 .replace("\n[full reference]: https://example.org\n", "")
1028 .replace("[shortcut]: https://example.org\n", "");
1029
1030 assert_roundtrip(&kitchen_sink_md);
1031
1032 //==================================
1033 // README.md
1034 //==================================
1035
1036 let readme = include_str!("../../../README.md");
1037
1038 assert_roundtrip(readme);
1039}
1040
1041#[cfg(test)]
1042fn assert_roundtrip(markdown: &str) {
1043 use pretty_assertions::assert_eq;
1044
1045 // Recall:
1046 //
1047 // String => Events => Blocks => Events => String
1048 // |_____ A ______| |______ C _____|
1049 // |______ B _____| |______ D _____|
1050 // |__________ E ___________|
1051 // |___________ F __________|
1052
1053 // Do A to get Events
1054 let original_events: Vec<Event> = markdown_to_events(markdown).collect();
1055
1056 // Do B to get AST Blocks
1057 let ast: Vec<Block> = events_to_ast(original_events.clone());
1058
1059 // println!("ast = {ast:#?}");
1060
1061 // Do C to get Events again
1062 let processed_events: Vec<Event> = ast_to_events(&ast);
1063
1064 // println!("original_events = {original_events:#?}");
1065
1066 // Test that A => B => C is equivalent to just A.
1067 // I.e. that converting an Event stream to and from an AST is lossless.
1068 assert_eq!(processed_events, original_events);
1069
1070 // Test that A => B => C => D produces Markdown equivalent to the original
1071 // Markdown string.
1072 assert_eq!(ast_to_markdown(&ast), markdown);
1073}