markdown_ast/
lib.rs

1//! Parse a Markdown input string into a sequence of Markdown abstract syntax
2//! tree [`Block`]s.
3//!
4//! This crate is intentionally designed to interoperate well with the
5//! [`pulldown-cmark`](https://crates.io/crate/pulldown-cmark) crate and the
6//! ecosystem around it. See [Motivation and relation to pulldown-cmark](#motivation-and-relation-to-pulldown-cmark)
7//! for more information.
8//!
9//! The AST types are designed to align with the structure defined
10//! by the [CommonMark Specification](https://spec.commonmark.org/).
11//!
12//! # Quick Examples
13//!
14//! Parse simple Markdown into an AST:
15//!
16//! ```
17//! use markdown_ast::{markdown_to_ast, Block, Inline, Inlines};
18//! # use pretty_assertions::assert_eq;
19//!
20//! let ast = markdown_to_ast("
21//! Hello! This is a paragraph **with bold text**.
22//! ");
23//!
24//! assert_eq!(ast, vec![
25//!     Block::Paragraph(Inlines(vec![
26//!         Inline::Text("Hello! This is a paragraph ".to_owned()),
27//!         Inline::Strong(Inlines(vec![
28//!             Inline::Text("with bold text".to_owned()),
29//!         ])),
30//!         Inline::Text(".".to_owned())
31//!     ]))
32//! ]);
33//! ```
34//!
35//!
36//!
37//! # API Overview
38//!
39//! | Function                           | Input      | Output       |
40//! |------------------------------------|------------|--------------|
41//! | [`markdown_to_ast()`]              | `&str`     | `Vec<Block>` |
42//! | [`ast_to_markdown()`]              | `&[Block]` | `String`     |
43//! | [`ast_to_events()`]                | `&[Block]` | `Vec<Event>` |
44//! | [`events_to_ast()`]                | `&[Event]` | `Vec<Block>` |
45//! | [`events_to_markdown()`]           | `&[Event]` | `String`     |
46//! | [`markdown_to_events()`]           | `&str`     | `Vec<Event>` |
47//! | [`canonicalize()`]                 | `&str`     | `String`     |
48//!
49//! ##### Terminology
50//!
51//! This crate is able to process and manipulate Markdown in three different
52//! representations:
53//!
54//! | Term     | Type                 | Description                         |
55//! |----------|----------------------|-------------------------------------|
56//! | Markdown | `String`             | Raw Markdown source / output string |
57//! | Events   | `&[Event]`           | Markdown parsed by [`pulldown-cmark`](https://crates.io/crates/pulldown-cmark) into a flat sequence of parser [`Event`]s |
58//! | AST      | `Block` / `&[Block]` | Markdown parsed by `markdown-ast` into a hierarchical structure of [`Block`]s |
59//!
60//! ##### Processing Steps
61//!
62//! ```text
63//!     String => Events => Blocks => Events => String
64//!     |_____ A ______|    |______ C _____|
65//!               |______ B _____|    |______ D _____|
66//!     |__________ E ___________|
67//!                         |___________ F __________|
68//!     |____________________ G _____________________|
69//! ```
70//!
71//! - **A** — [`markdown_to_events()`]
72//! - **B** — [`events_to_ast()`]
73//! - **C** — [`ast_to_events()`]
74//! - **D** — [`events_to_markdown()`]
75//! - **E** — [`markdown_to_ast()`]
76//! - **F** — [`ast_to_markdown()`]
77//! - **G** — [`canonicalize()`]
78//!
79//! Note: **A** wraps [`pulldown_cmark::Parser`], and **D** wraps
80//! [`pulldown_cmark_to_cmark::cmark()`].
81//!
82//!
83//!
84//! # Detailed Examples
85//!
86//! #### Parse varied Markdown to an AST representation:
87//!
88//! ```
89//! use markdown_ast::{
90//!     markdown_to_ast, Block, HeadingLevel, Inline, Inlines, ListItem
91//! };
92//! # use pretty_assertions::assert_eq;
93//!
94//! let ast = markdown_to_ast("
95//! ## An Example Document
96//!
97//! This is a paragraph that
98//! is split across *multiple* lines.
99//!
100//! * This is a list item
101//! ");
102//!
103//! assert_eq!(ast, vec![
104//!     Block::Heading(
105//!         HeadingLevel::H1,
106//!         Inlines(vec![
107//!              Inline::Text("An Example Document".to_owned())
108//!         ])
109//!     ),
110//!     Block::Paragraph(Inlines(vec![
111//!         Inline::Text("This is a paragraph that".to_owned()),
112//!         Inline::SoftBreak,
113//!         Inline::Text("is split across ".to_owned()),
114//!         Inline::Emphasis(Inlines(vec![
115//!             Inline::Text("multiple".to_owned()),
116//!         ])),
117//!         Inline::Text(" lines.".to_owned()),
118//!     ])),
119//!     Block::List(vec![
120//!         ListItem(vec![
121//!             Block::Paragraph(Inlines(vec![
122//!                 Inline::Text("This is a list item".to_owned())
123//!             ]))
124//!         ])
125//!     ])
126//! ]);
127//! ```
128//!
129//! #### Synthesize Markdown using programmatic construction of the document:
130//!
131//! *Note:* This is a more user friendly alternative to a "string builder"
132//! approach where the raw Markdown string is constructed piece by piece,
133//! which suffers from extra bookkeeping that must be done to manage things like
134//! indent level and soft vs hard breaks.
135//!
136//! ```
137//! use markdown_ast::{
138//!     ast_to_markdown, Block, Inline, Inlines, ListItem,
139//!     HeadingLevel,
140//! };
141//! # use pretty_assertions::assert_eq;
142//!
143//! let tech_companies = vec![
144//!     ("Apple", 1976, 164_000),
145//!     ("Microsoft", 1975, 221_000),
146//!     ("Nvidia", 1993, 29_600),
147//! ];
148//!
149//! let ast = vec![
150//!     Block::Heading(HeadingLevel::H1, Inlines::plain_text("Tech Companies")),
151//!     Block::plain_text_paragraph("The following are major tech companies:"),
152//!     Block::List(Vec::from_iter(
153//!         tech_companies
154//!             .into_iter()
155//!             .map(|(company_name, founded, employee_count)| {
156//!                 ListItem(vec![
157//!                     Block::paragraph(vec![Inline::plain_text(company_name)]),
158//!                     Block::List(vec![
159//!                         ListItem::plain_text(format!("Founded: {founded}")),
160//!                         ListItem::plain_text(format!("Employee count: {employee_count}"))
161//!                     ])
162//!                 ])
163//!             })
164//!     ))
165//! ];
166//!
167//! let markdown: String = ast_to_markdown(&ast);
168//!
169//! assert_eq!(markdown, "\
170//! ## Tech Companies
171//!
172//! The following are major tech companies:
173//!
174//! * Apple
175//!  
176//!   * Founded: 1976
177//!  
178//!   * Employee count: 164000
179//!
180//! * Microsoft
181//!  
182//!   * Founded: 1975
183//!  
184//!   * Employee count: 221000
185//!
186//! * Nvidia
187//!  
188//!   * Founded: 1993
189//!  
190//!   * Employee count: 29600\
191//! ");
192//!
193//! ```
194//!
195//! # Known Issues
196//!
197//! Currently `markdown-ast` does not escape Markdown content appearing in
198//! leaf inline text:
199//!
200//! ```
201//! use markdown_ast::{ast_to_markdown, Block};
202//!
203//! let ast = vec![
204//!     Block::plain_text_paragraph("In the equation a*b*c ...")
205//! ];
206//!
207//! let markdown = ast_to_markdown(&ast);
208//!
209//! assert_eq!(markdown, "In the equation a*b*c ...");
210//! ```
211//!
212//! which will render as:
213//!
214//! > In the equation a*b*c ...
215//!
216//! with the asterisks interpreted as emphasis formatting markers, contrary to
217//! the intention of the author.
218//!
219//! Fixing this robustly will require either:
220//!
221//! * Adding automatic escaping of Markdown characters in [`Inline::Text`]
222//!   during rendering (not ideal)
223//!
224//! * Adding pre-construction validation checks for [`Inline::Text`] that
225//!   prevent constructing an `Inline` with Markdown formatting characters that
226//!   have not been escaped correctly by the user.
227//!
228//! In either case, fixing this bug will be considered a **semver exempt**
229//! change in behavior to `markdown-ast`.
230//!
231//! # Motivation and relation to `pulldown-cmark`
232//!
233//! [`pulldown-cmark`](https://crates.io/crates/pulldown-cmark) is a popular
234//! Markdown parser crate. It provides a streaming event (pull parsing) based
235//! representation of a Markdown document. That representation is useful for
236//! efficient transformation of a Markdown document into another format, often
237//! HTML.
238//!
239//! However, a streaming parser representation is less amenable to programmatic
240//! construction or human-understandable transformations of Markdown documents.
241//!
242//! `markdown-ast` provides a abstract syntax tree (AST) representation of
243//! Markdown that is easy to construct and work with.
244//!
245//! Additionally, `pulldown-cmark` is widely used in the Rust crate ecosystem,
246//! for example for [`mdbook`](https://crates.io/crates/mdbook) extensions.
247//! Interoperability with `pulldown-cmark` is an intentional design choice for
248//! the usability of `markdown-ast`; one could imagine `markdown-ast` instead
249//! abstracting over the underlying parser implementation, but my view is that
250//! would limit the utility of `markdown-ast`.
251//!
252
253mod unflatten;
254
255mod from_events;
256mod to_events;
257
258/// Ensure that doc tests in the README.md file get run.
259///
260/// See: <https://connorgray.com/reference/creating-a-new-rust-crate#test-readmemd-examples>
261mod test_readme {
262    #![doc = include_str!("../README.md")]
263}
264
265use pulldown_cmark::{self as md, CowStr, Event};
266
267pub use pulldown_cmark::HeadingLevel;
268
269//======================================
270// AST Representation
271//======================================
272
273/// A piece of structural Markdown content.
274/// (CommonMark: [blocks](https://spec.commonmark.org/0.30/#blocks),
275/// [container blocks](https://spec.commonmark.org/0.30/#container-blocks))
276#[derive(Debug, Clone, PartialEq)]
277pub enum Block {
278    /// CommonMark: [paragraphs](https://spec.commonmark.org/0.30/#paragraphs)
279    Paragraph(Inlines),
280    /// CommonMark: [lists](https://spec.commonmark.org/0.30/#lists)
281    List(Vec<ListItem>),
282    /// CommonMark: [ATX heading](https://spec.commonmark.org/0.30/#atx-heading)
283    Heading(HeadingLevel, Inlines),
284    /// An indented or fenced code block.
285    ///
286    /// CommonMark: [indented code blocks](https://spec.commonmark.org/0.30/#indented-code-blocks),
287    /// [fenced code blocks](https://spec.commonmark.org/0.30/#fenced-code-blocks)
288    CodeBlock {
289        /// Indicates whether this is a fenced or indented code block.
290        ///
291        /// If this `CodeBlock` is a fenced code block, this contains its info
292        /// string.
293        ///
294        /// CommonMark: [info string](https://spec.commonmark.org/0.30/#info-string)
295        kind: CodeBlockKind,
296        code: String,
297    },
298    /// CommonMark: [block quotes](https://spec.commonmark.org/0.30/#block-quotes)
299    BlockQuote {
300        // TODO: Document
301        kind: Option<md::BlockQuoteKind>,
302        blocks: Vec<Block>,
303    },
304    Table {
305        alignments: Vec<md::Alignment>,
306        headers: Vec<Inlines>,
307        rows: Vec<Vec<Inlines>>,
308    },
309    /// CommonMark: [thematic breaks](https://spec.commonmark.org/0.30/#thematic-breaks)
310    Rule,
311}
312
313/// A sequence of [`Inline`]s.
314/// (CommonMark: [inlines](https://spec.commonmark.org/0.30/#inlines))
315#[derive(Debug, Clone, PartialEq)]
316pub struct Inlines(pub Vec<Inline>);
317
318/// An item in a list. (CommonMark: [list items](https://spec.commonmark.org/0.30/#list-items))
319#[derive(Debug, Clone, PartialEq)]
320pub struct ListItem(pub Vec<Block>);
321
322/// An inline piece of atomic Markdown content.
323/// (CommonMark: [inlines](https://spec.commonmark.org/0.30/#inlines))
324#[derive(Debug, Clone, PartialEq)]
325pub enum Inline {
326    Text(String),
327    /// CommonMark: [emphasis](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
328    Emphasis(Inlines),
329    /// CommonMark: [strong emphasis](https://spec.commonmark.org/0.30/#emphasis-and-strong-emphasis)
330    Strong(Inlines),
331    /// Strikethrough styled text. (Non-standard.)
332    Strikethrough(Inlines),
333    /// CommonMark: [code spans](https://spec.commonmark.org/0.30/#code-spans)
334    Code(String),
335    /// CommonMark: [links](https://spec.commonmark.org/0.30/#links)
336    // TODO:
337    //  Document every type of Inline::Link value and what its equivalent source
338    //  is.
339    Link {
340        link_type: md::LinkType,
341        /// CommonMark: [link destination](https://spec.commonmark.org/0.30/#link-destination)
342        dest_url: String,
343        /// CommonMark: [link title](https://spec.commonmark.org/0.30/#link-title)
344        title: String,
345        /// CommonMark: [link label](https://spec.commonmark.org/0.30/#link-label)
346        id: String,
347        /// CommonMark: [link text](https://spec.commonmark.org/0.30/#link-text)
348        content_text: Inlines,
349    },
350    /// CommonMark: [soft line breaks](https://spec.commonmark.org/0.30/#soft-line-breaks)
351    SoftBreak,
352    /// CommonMark: [hard line breaks](https://spec.commonmark.org/0.30/#hard-line-breaks)
353    HardBreak,
354}
355
356#[derive(Debug, Clone, PartialEq)]
357pub enum CodeBlockKind {
358    Fenced(String),
359    Indented,
360}
361
362//======================================
363// Public API Functions
364//======================================
365
366/// Parse Markdown input string into AST [`Block`]s.
367pub fn markdown_to_ast(input: &str) -> Vec<Block> {
368    /* For Markdown parsing debugging.
369    {
370        let mut options = md::Options::empty();
371        options.insert(md::Options::ENABLE_STRIKETHROUGH);
372        let parser = md::Parser::new_ext(input, options);
373
374        let events: Vec<_> = parser.into_iter().collect();
375
376        println!("==== All events =====\n");
377        for event in &events {
378            println!("{event:?}");
379        }
380        println!("\n=====================\n");
381
382        println!("==== Unflattened events =====\n");
383        for event in unflatten::parse_markdown_to_unflattened_events(input) {
384            println!("{event:#?}")
385        }
386        println!("=============================\n");
387    }
388    */
389
390    let events = markdown_to_events(input);
391
392    return events_to_ast(events);
393}
394
395/// Convert AST [`Block`]s into a Markdown string.
396pub fn ast_to_markdown(blocks: &[Block]) -> String {
397    let events = ast_to_events(blocks);
398
399    return events_to_markdown(events);
400}
401
402/// Convert [`Event`]s into a Markdown string.
403///
404/// This is a thin wrapper around
405/// [`pulldown_cmark_to_cmark::cmark_with_options`], provided in this crate for
406/// consistency and ease of use.
407pub fn events_to_markdown<'e, I: IntoIterator<Item = Event<'e>>>(
408    events: I,
409) -> String {
410    let mut string = String::new();
411
412    let options = default_to_markdown_options();
413
414    let _: pulldown_cmark_to_cmark::State =
415        pulldown_cmark_to_cmark::cmark_with_options(
416            events.into_iter(),
417            &mut string,
418            options,
419        )
420        .expect("error converting Event sequent to Markdown string");
421
422    string
423}
424
425/// Convert AST [`Block`]s into an [`Event`] sequence.
426pub fn ast_to_events(blocks: &[Block]) -> Vec<Event> {
427    let mut events: Vec<Event> = Vec::new();
428
429    for block in blocks {
430        let events = &mut events;
431
432        crate::to_events::block_to_events(&block, events);
433    }
434
435    events
436}
437
438/// Parse [`Event`]s into AST [`Block`]s.
439pub fn events_to_ast<'i, I: IntoIterator<Item = Event<'i>>>(
440    events: I,
441) -> Vec<Block> {
442    let events =
443        unflatten::parse_markdown_to_unflattened_events(events.into_iter());
444
445    crate::from_events::ast_events_to_ast(events)
446}
447
448/// Parse Markdown input string into [`Event`]s.
449///
450/// This is a thin wrapper around [`pulldown_cmark::Parser`], provided in this
451/// crate for consistency and ease of use.
452pub fn markdown_to_events<'i>(
453    input: &'i str,
454) -> impl Iterator<Item = Event<'i>> {
455    // Set up options and parser. Strikethroughs are not part of the CommonMark standard
456    // and we therefore must enable it explicitly.
457    let mut options = md::Options::empty();
458    options.insert(md::Options::ENABLE_STRIKETHROUGH);
459    options.insert(md::Options::ENABLE_TABLES);
460    md::Parser::new_ext(input, options)
461}
462
463/// Canonicalize (or format) a Markdown input by parsing and then converting
464/// back to a string.
465///
466/// **⚠️ Warning ⚠️:** This function is **semver exempt**. The precise
467/// canonicalization behavior may change in MINOR or PATCH versions of
468/// markdown-ast. (Stabilizing the behavior of this function will require
469/// additional options to configure the behavior of
470/// [pulldown-cmark-to-cmark](https://crates.io/crates/pulldown-cmark-to-cmark).)
471///
472/// # Examples
473///
474/// List items using `-` (minus) are canonicalized to the `*` (asterisk) list
475/// marker type:
476///
477/// ```
478/// use markdown_ast::canonicalize;
479/// assert_eq!(
480/// canonicalize("\
481/// - Foo
482/// - Bar
483/// "),
484/// "\
485/// * Foo
486///
487/// * Bar"
488/// )
489/// ```
490///
491/// Hard breaks ending in backslash are canonicalized to the "two spaces at the
492/// end of the line" form:
493///
494/// ```
495/// use markdown_ast::canonicalize;
496/// assert_eq!(
497/// canonicalize(r#"
498/// This ends in a hard break.\
499/// This is a new line."#),
500/// // Note: The two spaces at the end of the first line below may not be
501/// //       visible, but they're there.
502/// "\
503/// This ends in a hard break.  
504/// This is a new line."
505/// )
506/// ```
507pub fn canonicalize(input: &str) -> String {
508    let ast = markdown_to_ast(input);
509
510    return ast_to_markdown(&ast);
511}
512
513fn default_to_markdown_options() -> pulldown_cmark_to_cmark::Options<'static> {
514    pulldown_cmark_to_cmark::Options {
515        // newlines_after_paragraph: 2,
516        // newlines_after_headline: 0,
517        // newlines_after_codeblock: 0,
518        // newlines_after_list: 1,
519        // newlines_after_rest: 0,
520        code_block_token_count: 3,
521        ..pulldown_cmark_to_cmark::Options::default()
522    }
523}
524
525//======================================
526// Impls
527//======================================
528
529impl Inline {
530    /// Construct a inline containing a piece of plain text.
531    pub fn plain_text<S: Into<String>>(s: S) -> Self {
532        Inline::Text(s.into())
533    }
534
535    pub fn emphasis(inline: Inline) -> Self {
536        Inline::Emphasis(Inlines(vec![inline]))
537    }
538
539    pub fn strong(inline: Inline) -> Self {
540        Inline::Strong(Inlines(vec![inline]))
541    }
542
543    pub fn strikethrough(inline: Inline) -> Self {
544        Inline::Strikethrough(Inlines(vec![inline]))
545    }
546
547    pub fn code<S: Into<String>>(s: S) -> Self {
548        Inline::Code(s.into())
549    }
550}
551
552impl Inlines {
553    /// Construct an inlines sequence containing a single inline piece of plain
554    /// text.
555    pub fn plain_text<S: Into<String>>(inline: S) -> Self {
556        return Inlines(vec![Inline::Text(inline.into())]);
557    }
558}
559
560impl Block {
561    /// Construct a paragraph block containing a single inline piece of plain
562    /// text.
563    pub fn plain_text_paragraph<S: Into<String>>(inline: S) -> Self {
564        return Block::Paragraph(Inlines(vec![Inline::Text(inline.into())]));
565    }
566
567    pub fn paragraph(text: Vec<Inline>) -> Block {
568        Block::Paragraph(Inlines(text))
569    }
570}
571
572impl ListItem {
573    /// Construct a list item containing a single inline piece of plain text.
574    pub fn plain_text<S: Into<String>>(inline: S) -> Self {
575        return ListItem(vec![Block::Paragraph(Inlines(vec![Inline::Text(
576            inline.into(),
577        )]))]);
578    }
579}
580
581impl CodeBlockKind {
582    pub fn info_string(&self) -> Option<&str> {
583        match self {
584            CodeBlockKind::Fenced(info_string) => Some(info_string.as_str()),
585            CodeBlockKind::Indented => None,
586        }
587    }
588
589    pub(crate) fn from_pulldown_cmark(kind: md::CodeBlockKind) -> Self {
590        match kind {
591            md::CodeBlockKind::Indented => CodeBlockKind::Indented,
592            md::CodeBlockKind::Fenced(info_string) => {
593                CodeBlockKind::Fenced(info_string.to_string())
594            },
595        }
596    }
597
598    pub(crate) fn to_pulldown_cmark<'s>(&'s self) -> md::CodeBlockKind<'s> {
599        match self {
600            CodeBlockKind::Fenced(info) => {
601                md::CodeBlockKind::Fenced(CowStr::from(info.as_str()))
602            },
603            CodeBlockKind::Indented => md::CodeBlockKind::Indented,
604        }
605    }
606}
607
608impl IntoIterator for Inlines {
609    type Item = Inline;
610    type IntoIter = std::vec::IntoIter<Inline>;
611
612    fn into_iter(self) -> Self::IntoIter {
613        let Inlines(vec) = self;
614        vec.into_iter()
615    }
616}
617
618//======================================
619// Tests: Markdown to AST parsing
620//======================================
621
622#[test]
623fn test_markdown_to_ast() {
624    use indoc::indoc;
625    use pretty_assertions::assert_eq;
626
627    assert_eq!(
628        markdown_to_ast("hello"),
629        vec![Block::paragraph(vec![Inline::Text("hello".into())])]
630    );
631
632    //--------------
633    // Styled text
634    //--------------
635
636    assert_eq!(
637        markdown_to_ast("*hello*"),
638        vec![Block::paragraph(vec![Inline::emphasis(Inline::Text(
639            "hello".into()
640        ))])]
641    );
642
643    assert_eq!(
644        markdown_to_ast("**hello**"),
645        vec![Block::paragraph(vec![Inline::strong(Inline::Text(
646            "hello".into()
647        ))])]
648    );
649
650    assert_eq!(
651        markdown_to_ast("~~hello~~"),
652        vec![Block::paragraph(vec![Inline::strikethrough(Inline::Text(
653            "hello".into()
654        ))])]
655    );
656
657    assert_eq!(
658        markdown_to_ast("**`strong code`**"),
659        vec![Block::paragraph(vec![Inline::strong(Inline::Code(
660            "strong code".into()
661        ))])]
662    );
663
664    assert_eq!(
665        markdown_to_ast("~~`foo`~~"),
666        vec![Block::paragraph(vec![Inline::strikethrough(Inline::Code(
667            "foo".into()
668        ))])]
669    );
670
671    assert_eq!(
672        markdown_to_ast("**[example](example.com)**"),
673        vec![Block::paragraph(vec![Inline::strong(Inline::Link {
674            link_type: md::LinkType::Inline,
675            dest_url: "example.com".into(),
676            title: String::new(),
677            id: String::new(),
678            content_text: Inlines(vec![Inline::Text("example".into())]),
679        })])]
680    );
681
682    // Test composition of emphasis, strong, strikethrough and code
683    assert_eq!(
684        markdown_to_ast("_~~**`foo`**~~_"),
685        vec![Block::paragraph(vec![Inline::emphasis(
686            Inline::strikethrough(Inline::strong(Inline::Code("foo".into())))
687        )])]
688    );
689
690    //--------------
691    // Lists
692    //--------------
693
694    assert_eq!(
695        markdown_to_ast("* hello"),
696        vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
697            Inline::Text("hello".into())
698        ])])])]
699    );
700
701    // List items with styled text
702
703    assert_eq!(
704        markdown_to_ast("* *hello*"),
705        vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
706            Inline::emphasis(Inline::Text("hello".into()))
707        ])])])]
708    );
709
710    assert_eq!(
711        markdown_to_ast("* **hello**"),
712        vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
713            Inline::strong(Inline::Text("hello".into()))
714        ])])])]
715    );
716
717    assert_eq!(
718        markdown_to_ast("* ~~hello~~"),
719        vec![Block::List(vec![ListItem(vec![Block::paragraph(vec![
720            Inline::strikethrough(Inline::Text("hello".into()),)
721        ])])])]
722    );
723
724    //----------------------------------
725
726    let input = "\
727* And **bold** text.
728  
729  * With nested list items.
730    
731    * `md2nb` supports nested lists up to three levels deep.
732";
733
734    let ast = vec![Block::List(vec![ListItem(vec![
735        Block::paragraph(vec![
736            Inline::plain_text("And "),
737            Inline::strong(Inline::plain_text("bold")),
738            Inline::plain_text(" text."),
739        ]),
740        Block::List(vec![ListItem(vec![
741            Block::paragraph(vec![Inline::plain_text(
742                "With nested list items.",
743            )]),
744            Block::List(vec![ListItem(vec![Block::paragraph(vec![
745                Inline::code("md2nb"),
746                Inline::plain_text(
747                    " supports nested lists up to three levels deep.",
748                ),
749            ])])]),
750        ])]),
751    ])])];
752
753    assert_eq!(markdown_to_ast(input), ast);
754
755    // Sanity check conversion to event stream.
756    assert_eq!(
757        markdown_to_events(input).collect::<Vec<_>>(),
758        ast_to_events(&ast)
759    );
760
761    //----------------------------------
762    // Test structures
763    //----------------------------------
764
765    assert_eq!(
766        markdown_to_ast(indoc!(
767            "
768            * hello
769
770              world
771            "
772        )),
773        vec![Block::List(vec![ListItem(vec![
774            Block::paragraph(vec![Inline::Text("hello".into())]),
775            Block::paragraph(vec![Inline::Text("world".into())])
776        ])])]
777    );
778
779    #[rustfmt::skip]
780    assert_eq!(
781        markdown_to_ast(indoc!(
782            "
783            # Example
784
785            * A
786              - A.A
787
788                hello world
789
790                * *A.A.A*
791            "
792        )),
793        vec![
794            Block::Heading(
795                HeadingLevel::H1,
796                Inlines(vec![Inline::Text("Example".into())])
797            ),
798            Block::List(vec![
799                ListItem(vec![
800                    Block::paragraph(vec![Inline::Text("A".into())]),
801                    Block::List(vec![
802                        ListItem(vec![
803                            Block::paragraph(vec![Inline::Text("A.A".into())]),
804                            Block::paragraph(vec![Inline::Text("hello world".into())]),
805                            Block::List(vec![
806                                ListItem(vec![
807                                    Block::paragraph(vec![
808                                        Inline::emphasis(
809                                            Inline::Text(
810                                            "A.A.A".into()),
811                                        )
812                                    ])
813                                ])
814                            ])
815                        ])
816                    ])
817                ])
818            ])
819        ]
820    );
821
822    #[rustfmt::skip]
823    assert_eq!(
824        markdown_to_ast(indoc!(
825            "
826            * A
827              - A.A
828                * A.A.A
829              - A.B
830              - A.C
831            "
832        )),
833        vec![
834            Block::List(vec![
835                ListItem(vec![
836                    Block::paragraph(vec![Inline::Text("A".into())]),
837                    Block::List(vec![
838                        ListItem(vec![
839                            Block::paragraph(vec![Inline::Text("A.A".into())]),
840                            Block::List(vec![ListItem(vec![
841                                Block::paragraph(vec![Inline::Text("A.A.A".into())]),
842                            ])])
843                        ]),
844                        ListItem(vec![
845                            Block::paragraph(vec![Inline::Text("A.B".into())]),
846                        ]),
847                        ListItem(vec![
848                            Block::paragraph(vec![Inline::Text("A.C".into())]),
849                        ])
850                    ])
851                ])
852            ])
853        ]
854    );
855
856    #[rustfmt::skip]
857    assert_eq!(
858        markdown_to_ast(indoc!(
859            "
860            # Example
861
862            * A
863              - A.A
864              - A.B
865              * A.C
866            "
867        )),
868        vec![
869            Block::Heading(
870                HeadingLevel::H1,
871                Inlines(vec![Inline::Text("Example".into())])
872            ),
873            Block::List(vec![
874                ListItem(vec![
875                    Block::paragraph(vec![Inline::Text("A".into())]),
876                    Block::List(vec![
877                        ListItem(vec![
878                            Block::paragraph(vec![Inline::Text("A.A".into())]),
879                        ]),
880                        ListItem(vec![
881                            Block::paragraph(vec![Inline::Text("A.B".into())]),
882                        ]),
883                    ]),
884                    Block::List(vec![
885                        ListItem(vec![
886                            Block::paragraph(vec![Inline::Text("A.C".into())])
887                        ])
888                    ]),
889                ]),
890            ])
891        ]
892    );
893
894    #[rustfmt::skip]
895    assert_eq!(
896        markdown_to_ast(indoc!(
897            "
898            * A
899              - A.A
900              - A.B
901
902                separate paragraph
903
904              - A.C
905            "
906        )),
907        vec![
908            Block::List(vec![
909                ListItem(vec![
910                    Block::paragraph(vec![Inline::Text("A".into())]),
911                    Block::List(vec![
912                        ListItem(vec![
913                            Block::paragraph(vec![Inline::Text("A.A".into())]),
914                        ]),
915                        ListItem(vec![
916                            Block::paragraph(vec![Inline::Text("A.B".into())]),
917                            Block::paragraph(vec![Inline::Text("separate paragraph".into())]),
918                        ]),
919                        ListItem(vec![
920                            Block::paragraph(vec![Inline::Text("A.C".into())]),
921                        ])
922                    ])
923                ])
924            ])
925        ]
926    );
927
928    #[rustfmt::skip]
929    assert_eq!(
930        markdown_to_ast(indoc!(
931            "
932            # Example
933
934            * A
935              - A.A
936                * A.A.A
937                  **soft break**
938
939              - A.B
940
941                separate paragraph
942
943              - A.C
944            "
945        )),
946        vec![
947            Block::Heading(
948                HeadingLevel::H1,
949                Inlines(vec![Inline::Text("Example".into())])
950            ),
951            Block::List(vec![
952                ListItem(vec![
953                    Block::paragraph(vec![Inline::Text("A".into())]),
954                    Block::List(vec![
955                        ListItem(vec![
956                            Block::paragraph(vec![Inline::Text("A.A".into())]),
957                            Block::List(vec![
958                                ListItem(vec![
959                                    Block::paragraph(vec![
960                                        Inline::Text("A.A.A".into()),
961                                        Inline::SoftBreak,
962                                        Inline::strong(
963                                            Inline::Text("soft break".into()),
964                                        )
965                                    ]),
966                                ])
967                            ]),
968                        ]),
969                        ListItem(vec![
970                            Block::paragraph(vec![Inline::Text("A.B".into())]),
971                            Block::paragraph(vec![Inline::Text("separate paragraph".into())]),
972                        ]),
973                        ListItem(vec![
974                            Block::paragraph(vec![Inline::Text("A.C".into())]),
975                        ]),
976                    ])
977                ])
978            ])
979        ]
980    );
981}
982
983//======================================
984// Tests: AST to Markdown string
985//======================================
986
987#[test]
988fn test_ast_to_markdown() {
989    use indoc::indoc;
990    // use pretty_assertions::assert_eq;
991
992    assert_eq!(
993        ast_to_markdown(&[Block::paragraph(vec![Inline::Text(
994            "hello".into()
995        )])]),
996        "hello"
997    );
998
999    assert_eq!(
1000        ast_to_markdown(&[Block::List(vec![ListItem(vec![
1001            Block::paragraph(vec![Inline::Text("hello".into())]),
1002            Block::paragraph(vec![Inline::Text("world".into())])
1003        ])])]),
1004        indoc!(
1005            "
1006            * hello
1007              
1008              world"
1009        ),
1010    )
1011}
1012
1013/// Tests that some of the larger Markdown documents in this repository
1014/// all round-trip when processed:
1015#[test]
1016fn test_md_documents_roundtrip() {
1017    let kitchen_sink_md =
1018        include_str!("../../md2nb/docs/examples/kitchen-sink.md");
1019
1020    // FIXME:
1021    //  Fix the bugs requiring these hacky removals from kitchen-sink.md
1022    //  that are needed to make the tests below pass.
1023    let kitchen_sink_md = kitchen_sink_md
1024        .replace("\n    \"This is an indented code block.\"\n", "")
1025        .replace("\nThis is a [shortcut] reference link.\n", "")
1026        .replace("\nThis is a [full reference][full reference] link.\n", "")
1027        .replace("\n[full reference]: https://example.org\n", "")
1028        .replace("[shortcut]: https://example.org\n", "");
1029
1030    assert_roundtrip(&kitchen_sink_md);
1031
1032    //==================================
1033    // README.md
1034    //==================================
1035
1036    let readme = include_str!("../../../README.md");
1037
1038    assert_roundtrip(readme);
1039}
1040
1041#[cfg(test)]
1042fn assert_roundtrip(markdown: &str) {
1043    use pretty_assertions::assert_eq;
1044
1045    // Recall:
1046    //
1047    //     String => Events => Blocks => Events => String
1048    //     |_____ A ______|    |______ C _____|
1049    //               |______ B _____|    |______ D _____|
1050    //     |__________ E ___________|
1051    //                         |___________ F __________|
1052
1053    // Do A to get Events
1054    let original_events: Vec<Event> = markdown_to_events(markdown).collect();
1055
1056    // Do B to get AST Blocks
1057    let ast: Vec<Block> = events_to_ast(original_events.clone());
1058
1059    // println!("ast = {ast:#?}");
1060
1061    // Do C to get Events again
1062    let processed_events: Vec<Event> = ast_to_events(&ast);
1063
1064    // println!("original_events = {original_events:#?}");
1065
1066    // Test that A => B => C is equivalent to just A.
1067    // I.e. that converting an Event stream to and from an AST is lossless.
1068    assert_eq!(processed_events, original_events);
1069
1070    // Test that A => B => C => D produces Markdown equivalent to the original
1071    // Markdown string.
1072    assert_eq!(ast_to_markdown(&ast), markdown);
1073}