pulldown_cmark_to_cmark/
lib.rs

1//! Convert `pulldown-cmark` `Event`s back to the string they were parsed from.
2//!
3//! This crate provides functions to serialize markdown events back into markdown text format.
4//!
5//! # Examples
6//!
7//! ```rust
8//! use pulldown_cmark::Parser;
9//! use pulldown_cmark_to_cmark::cmark;
10//!
11//! let input_markdown = "# Hello\n\nWorld!";
12//! let events = Parser::new(input_markdown);
13//! let mut output_markdown = String::new();
14//! cmark(events, &mut output_markdown).unwrap();
15//! assert_eq!(output_markdown, input_markdown);
16//! ```
17
18#![deny(rust_2018_idioms)]
19#![deny(missing_docs)]
20
21use std::{
22    borrow::{Borrow, Cow},
23    collections::HashSet,
24    fmt,
25    ops::Range,
26};
27
28use pulldown_cmark::{Alignment as TableAlignment, BlockQuoteKind, Event, LinkType, MetadataBlockKind, Tag, TagEnd};
29
30mod source_range;
31mod text_modifications;
32
33pub use source_range::{
34    cmark_resume_with_source_range, cmark_resume_with_source_range_and_options, cmark_with_source_range,
35    cmark_with_source_range_and_options,
36};
37use text_modifications::*;
38
39/// Similar to [Pulldown-Cmark-Alignment][Alignment], but with required
40/// traits for comparison to allow testing.
41#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
42pub enum Alignment {
43    /// No alignment specified
44    None,
45    /// Left-aligned
46    Left,
47    /// Center-aligned
48    Center,
49    /// Right-aligned
50    Right,
51}
52
53impl<'a> From<&'a TableAlignment> for Alignment {
54    fn from(s: &'a TableAlignment) -> Self {
55        match *s {
56            TableAlignment::None => Self::None,
57            TableAlignment::Left => Self::Left,
58            TableAlignment::Center => Self::Center,
59            TableAlignment::Right => Self::Right,
60        }
61    }
62}
63
64/// The kind of code block being serialized.
65#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
66pub enum CodeBlockKind {
67    /// An indented code block (4 spaces or 1 tab)
68    Indented,
69    /// A fenced code block (delimited by backticks or tildes)
70    Fenced,
71}
72
73/// The state of the [`cmark_resume()`] and [`cmark_resume_with_options()`] functions.
74/// This does not only allow introspection, but enables the user
75/// to halt the serialization at any time, and resume it later.
76#[derive(Clone, Default, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
77#[non_exhaustive]
78pub struct State<'a> {
79    /// The amount of newlines to insert after `Event::Start(...)`
80    pub newlines_before_start: usize,
81    /// The lists and their types for which we have seen a `Event::Start(List(...))` tag
82    pub list_stack: Vec<Option<u64>>,
83    /// The computed padding and prefix to print after each newline.
84    /// This changes with the level of `BlockQuote` and `List` events.
85    pub padding: Vec<Cow<'a, str>>,
86    /// Keeps the current table alignments, if we are currently serializing a table.
87    pub table_alignments: Vec<Alignment>,
88    /// Keeps the current table headers, if we are currently serializing a table.
89    pub table_headers: Vec<String>,
90    /// The last seen text when serializing a header
91    pub text_for_header: Option<String>,
92    /// Is set while we are handling text in a code block
93    pub code_block: Option<CodeBlockKind>,
94    /// True if the last event was text and the text does not have trailing newline. Used to inject additional newlines before code block end fence.
95    pub last_was_text_without_trailing_newline: bool,
96    /// True if the last event was a paragraph start. Used to escape spaces at start of line (prevent spurrious indented code).
97    pub last_was_paragraph_start: bool,
98    /// True if the next event is a link, image, or footnote.
99    pub next_is_link_like: bool,
100    /// Currently open links
101    pub link_stack: Vec<LinkCategory<'a>>,
102    /// Currently open images
103    pub image_stack: Vec<ImageLink<'a>>,
104    /// Keeps track of the last seen heading's id, classes, and attributes
105    pub current_heading: Option<Heading<'a>>,
106    /// True whenever between `Start(TableCell)` and `End(TableCell)`
107    pub in_table_cell: bool,
108
109    /// Keeps track of the last seen shortcut/link
110    pub current_shortcut_text: Option<String>,
111    /// A list of shortcuts seen so far for later emission
112    pub shortcuts: Vec<(String, String, String)>,
113    /// Index into the `source` bytes of the end of the range corresponding to the last event.
114    ///
115    /// It's used to see if the current event didn't capture some bytes because of a
116    /// skipped-over backslash.
117    pub last_event_end_index: usize,
118}
119
120/// The category of link being serialized.
121#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
122pub enum LinkCategory<'a> {
123    /// An autolink (e.g., `<http://example.com>`)
124    AngleBracketed,
125    /// A reference link with an explicit label (e.g., `[text][label]`)
126    Reference {
127        /// The destination URI
128        uri: Cow<'a, str>,
129        /// The link title
130        title: Cow<'a, str>,
131        /// The reference identifier
132        id: Cow<'a, str>,
133    },
134    /// A collapsed reference link (e.g., `[text][]`)
135    Collapsed {
136        /// The destination URI
137        uri: Cow<'a, str>,
138        /// The link title
139        title: Cow<'a, str>,
140    },
141    /// A shortcut reference link (e.g., `[text]`)
142    Shortcut {
143        /// The destination URI
144        uri: Cow<'a, str>,
145        /// The link title
146        title: Cow<'a, str>,
147    },
148    /// An inline link or other link type
149    Other {
150        /// The destination URI
151        uri: Cow<'a, str>,
152        /// The link title
153        title: Cow<'a, str>,
154    },
155}
156
157/// The category of image link being serialized.
158#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
159pub enum ImageLink<'a> {
160    /// A reference image with an explicit label (e.g., `![alt][label]`)
161    Reference {
162        /// The destination URI
163        uri: Cow<'a, str>,
164        /// The image title
165        title: Cow<'a, str>,
166        /// The reference identifier
167        id: Cow<'a, str>,
168    },
169    /// A collapsed reference image (e.g., `![alt][]`)
170    Collapsed {
171        /// The destination URI
172        uri: Cow<'a, str>,
173        /// The image title
174        title: Cow<'a, str>,
175    },
176    /// A shortcut reference image (e.g., `![alt]`)
177    Shortcut {
178        /// The destination URI
179        uri: Cow<'a, str>,
180        /// The image title
181        title: Cow<'a, str>,
182    },
183    /// An inline image or other image type
184    Other {
185        /// The destination URI
186        uri: Cow<'a, str>,
187        /// The image title
188        title: Cow<'a, str>,
189    },
190}
191
192/// Information about a heading's attributes (id, classes, and other attributes).
193#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
194pub struct Heading<'a> {
195    /// The heading's id attribute, or `None` if no id is specified
196    id: Option<Cow<'a, str>>,
197    /// The heading's CSS class attributes; empty if no classes are specified
198    classes: Vec<Cow<'a, str>>,
199    /// Other attributes as key-value pairs in the form (attribute_name, optional_value)
200    attributes: Vec<(Cow<'a, str>, Option<Cow<'a, str>>)>,
201}
202
203/// Thea mount of code-block tokens one needs to produce a valid fenced code-block.
204pub const DEFAULT_CODE_BLOCK_TOKEN_COUNT: usize = 3;
205
206/// Configuration for the [`cmark_with_options()`] and [`cmark_resume_with_options()`] functions.
207/// The defaults should provide decent spacing and most importantly, will
208/// provide a faithful rendering of your markdown document particularly when
209/// rendering it to HTML.
210///
211/// It's best used with its `Options::default()` implementation.
212#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
213pub struct Options<'a> {
214    /// The number of newlines to insert after a headline
215    pub newlines_after_headline: usize,
216    /// The number of newlines to insert after a paragraph
217    pub newlines_after_paragraph: usize,
218    /// The number of newlines to insert after a code block
219    pub newlines_after_codeblock: usize,
220    /// The number of newlines to insert after an HTML block
221    pub newlines_after_htmlblock: usize,
222    /// The number of newlines to insert after a table
223    pub newlines_after_table: usize,
224    /// The number of newlines to insert after a horizontal rule
225    pub newlines_after_rule: usize,
226    /// The number of newlines to insert after a list
227    pub newlines_after_list: usize,
228    /// The number of newlines to insert after a block quote
229    pub newlines_after_blockquote: usize,
230    /// The number of newlines to insert after other elements
231    pub newlines_after_rest: usize,
232    /// The amount of newlines placed after TOML or YAML metadata blocks at the beginning of a document.
233    pub newlines_after_metadata: usize,
234    /// Token count for fenced code block. An appropriate value of this field can be decided by
235    /// [`calculate_code_block_token_count()`].
236    /// Note that the default value is `4` which allows for one level of nested code-blocks,
237    /// which is typically a safe value for common kinds of markdown documents.
238    pub code_block_token_count: usize,
239    /// The character to use for code block fences (backtick or tilde)
240    pub code_block_token: char,
241    /// The character to use for unordered list items
242    pub list_token: char,
243    /// The character to use after ordered list numbers (e.g., '.' for `1.`)
244    pub ordered_list_token: char,
245    /// Whether to increment the number for each ordered list item
246    pub increment_ordered_list_bullets: bool,
247    /// The character to use for emphasis (italic)
248    pub emphasis_token: char,
249    /// The string to use for strong emphasis (bold)
250    pub strong_token: &'a str,
251    /// If `true` (default) then use HTML tags `<sup>` and `<sub>`.
252    /// If `false`, use the Markdown symbols `^` and `~` instead.
253    ///
254    /// If you use [`ENABLE_SUPERSCRIPT`](pulldown_cmark::Options::ENABLE_SUPERSCRIPT) and
255    /// [`ENABLE_SUBSCRIPT`](pulldown_cmark::Options::ENABLE_SUBSCRIPT) when parsing, then
256    /// you might need this in order to round-trip Markdown byte-for-byte, with knowledge
257    /// of whether the parsed documents use `<sub>`/`<sup>` or `^`/`~` instead.
258    pub use_html_for_super_sub_script: bool,
259}
260
261const DEFAULT_OPTIONS: Options<'_> = Options {
262    newlines_after_headline: 2,
263    newlines_after_paragraph: 2,
264    newlines_after_codeblock: 2,
265    newlines_after_htmlblock: 1,
266    newlines_after_table: 2,
267    newlines_after_rule: 2,
268    newlines_after_list: 2,
269    newlines_after_blockquote: 2,
270    newlines_after_rest: 1,
271    newlines_after_metadata: 1,
272    code_block_token_count: 4,
273    code_block_token: '`',
274    list_token: '*',
275    ordered_list_token: '.',
276    increment_ordered_list_bullets: false,
277    emphasis_token: '*',
278    strong_token: "**",
279    use_html_for_super_sub_script: true,
280};
281
282impl Default for Options<'_> {
283    fn default() -> Self {
284        DEFAULT_OPTIONS
285    }
286}
287
288impl Options<'_> {
289    /// Returns the set of special characters that need escaping based on the current options.
290    pub fn special_characters(&self) -> Cow<'static, str> {
291        // These always need to be escaped, even if reconfigured.
292        const BASE: &str = "#\\_*<>`|[]";
293        if DEFAULT_OPTIONS.code_block_token == self.code_block_token
294            && DEFAULT_OPTIONS.list_token == self.list_token
295            && DEFAULT_OPTIONS.emphasis_token == self.emphasis_token
296            && DEFAULT_OPTIONS.strong_token == self.strong_token
297        {
298            BASE.into()
299        } else {
300            let mut s = String::from(BASE);
301            s.push(self.code_block_token);
302            s.push(self.list_token);
303            s.push(self.emphasis_token);
304            s.push_str(self.strong_token);
305            s.into()
306        }
307    }
308}
309
310/// The error returned by [`cmark_resume_with_options()`] and
311/// [`cmark_resume_with_source_range_and_options()`].
312#[derive(Debug)]
313pub enum Error {
314    /// Formatting to the output writer failed
315    FormatFailed(fmt::Error),
316    /// An event was encountered that cannot be produced by valid markdown
317    UnexpectedEvent,
318}
319
320impl fmt::Display for Error {
321    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
322        match self {
323            Self::FormatFailed(e) => e.fmt(f),
324            Self::UnexpectedEvent => f.write_str("Unexpected event while reconstructing Markdown"),
325        }
326    }
327}
328
329impl std::error::Error for Error {}
330
331impl From<fmt::Error> for Error {
332    fn from(e: fmt::Error) -> Self {
333        Self::FormatFailed(e)
334    }
335}
336
337/// As [`cmark_with_options()`], but with default [`Options`].
338pub fn cmark<'a, I, E, F>(events: I, mut formatter: F) -> Result<State<'a>, Error>
339where
340    I: Iterator<Item = E>,
341    E: Borrow<Event<'a>>,
342    F: fmt::Write,
343{
344    cmark_with_options(events, &mut formatter, Default::default())
345}
346
347/// As [`cmark_resume_with_options()`], but with default [`Options`].
348pub fn cmark_resume<'a, I, E, F>(events: I, formatter: F, state: Option<State<'a>>) -> Result<State<'a>, Error>
349where
350    I: Iterator<Item = E>,
351    E: Borrow<Event<'a>>,
352    F: fmt::Write,
353{
354    cmark_resume_with_options(events, formatter, state, Options::default())
355}
356
357/// As [`cmark_resume_with_options()`], but with the [`State`] finalized.
358pub fn cmark_with_options<'a, I, E, F>(events: I, mut formatter: F, options: Options<'_>) -> Result<State<'a>, Error>
359where
360    I: Iterator<Item = E>,
361    E: Borrow<Event<'a>>,
362    F: fmt::Write,
363{
364    let state = cmark_resume_with_options(events, &mut formatter, Default::default(), options)?;
365    state.finalize(formatter)
366}
367
368/// Serialize a stream of [pulldown-cmark-Events][Event] into a string-backed buffer.
369///
370/// 1. **events**
371///    * An iterator over [`Events`][Event], for example as returned by the [`Parser`][pulldown_cmark::Parser]
372/// 1. **formatter**
373///    * A format writer, can be a `String`.
374/// 1. **state**
375///    * The optional initial state of the serialization.
376/// 1. **options**
377///    * Customize the appearance of the serialization. All otherwise magic values are contained
378///      here.
379///
380/// *Returns* the [`State`] of the serialization on success. You can use it as initial state in the
381/// next call if you are halting event serialization.
382///
383/// *Errors* if the underlying buffer fails (which is unlikely) or if the [`Event`] stream
384/// cannot ever be produced by deserializing valid Markdown. Each failure mode corresponds to one
385/// of [`Error`]'s variants.
386pub fn cmark_resume_with_options<'a, I, E, F>(
387    events: I,
388    mut formatter: F,
389    state: Option<State<'a>>,
390    options: Options<'_>,
391) -> Result<State<'a>, Error>
392where
393    I: Iterator<Item = E>,
394    E: Borrow<Event<'a>>,
395    F: fmt::Write,
396{
397    let mut state = state.unwrap_or_default();
398    let mut events = events.peekable();
399    while let Some(event) = events.next() {
400        state.next_is_link_like = matches!(
401            events.peek().map(Borrow::borrow),
402            Some(
403                Event::Start(Tag::Link { .. } | Tag::Image { .. } | Tag::FootnoteDefinition(..))
404                    | Event::FootnoteReference(..)
405            )
406        );
407        cmark_resume_one_event(event, &mut formatter, &mut state, &options)?;
408    }
409    Ok(state)
410}
411
412fn cmark_resume_one_event<'a, E, F>(
413    event: E,
414    formatter: &mut F,
415    state: &mut State<'a>,
416    options: &Options<'_>,
417) -> Result<(), Error>
418where
419    E: Borrow<Event<'a>>,
420    F: fmt::Write,
421{
422    use pulldown_cmark::{Event::*, Tag::*};
423
424    let last_was_text_without_trailing_newline = state.last_was_text_without_trailing_newline;
425    state.last_was_text_without_trailing_newline = false;
426    let last_was_paragraph_start = state.last_was_paragraph_start;
427    state.last_was_paragraph_start = false;
428
429    let res = match event.borrow() {
430        Rule => {
431            consume_newlines(formatter, state)?;
432            state.set_minimum_newlines_before_start(options.newlines_after_rule);
433            formatter.write_str("---")
434        }
435        Code(text) => {
436            if let Some(shortcut_text) = state.current_shortcut_text.as_mut() {
437                shortcut_text.push('`');
438                shortcut_text.push_str(text);
439                shortcut_text.push('`');
440            }
441            if let Some(text_for_header) = state.text_for_header.as_mut() {
442                text_for_header.push('`');
443                text_for_header.push_str(text);
444                text_for_header.push('`');
445            }
446
447            // (re)-escape `|` when it appears as part of inline code in the
448            // body of a table.
449            //
450            // NOTE: This does not do *general* escaped-character handling
451            // because the only character which *requires* this handling in this
452            // spot in earlier versions of `pulldown-cmark` is a pipe character
453            // in inline code in a table. Other escaping is handled when `Text`
454            // events are emitted.
455            let text = if state.in_table_cell {
456                Cow::Owned(text.replace('|', "\\|"))
457            } else {
458                Cow::Borrowed(text.as_ref())
459            };
460
461            // When inline code has leading and trailing ' ' characters, additional space is needed
462            // to escape it, unless all characters are space.
463            if text.chars().all(|ch| ch == ' ') {
464                write!(formatter, "`{text}`")
465            } else {
466                // More backticks are needed to delimit the inline code than the maximum number of
467                // backticks in a consecutive run.
468                let backticks = Repeated('`', max_consecutive_chars(&text, '`') + 1);
469                let space = match text.as_bytes() {
470                    &[b'`', ..] | &[.., b'`'] => " ", // Space needed to separate backtick.
471                    &[b' ', .., b' '] => " ",         // Space needed to escape inner space.
472                    _ => "",                          // No space needed.
473                };
474                write!(formatter, "{backticks}{space}{text}{space}{backticks}")
475            }
476        }
477        Start(tag) => {
478            if let List(list_type) = tag {
479                state.list_stack.push(*list_type);
480                if state.list_stack.len() > 1 {
481                    state.set_minimum_newlines_before_start(options.newlines_after_rest);
482                }
483            }
484            let consumed_newlines = state.newlines_before_start != 0;
485            consume_newlines(formatter, state)?;
486            match tag {
487                Item => {
488                    // lazy lists act like paragraphs with no event
489                    state.last_was_paragraph_start = true;
490                    match state.list_stack.last_mut() {
491                        Some(inner) => {
492                            state.padding.push(list_item_padding_of(*inner));
493                            match inner {
494                                Some(n) => {
495                                    let bullet_number = *n;
496                                    if options.increment_ordered_list_bullets {
497                                        *n += 1;
498                                    }
499                                    write!(formatter, "{}{} ", bullet_number, options.ordered_list_token)
500                                }
501                                None => write!(formatter, "{} ", options.list_token),
502                            }
503                        }
504                        None => Ok(()),
505                    }
506                }
507                Table(alignments) => {
508                    state.table_alignments = alignments.iter().map(From::from).collect();
509                    Ok(())
510                }
511                TableHead => Ok(()),
512                TableRow => Ok(()),
513                TableCell => {
514                    state.text_for_header = Some(String::new());
515                    state.in_table_cell = true;
516                    formatter.write_char('|')
517                }
518                Link {
519                    link_type,
520                    dest_url,
521                    title,
522                    id,
523                } => {
524                    state.link_stack.push(match link_type {
525                        LinkType::Autolink | LinkType::Email => {
526                            formatter.write_char('<')?;
527                            LinkCategory::AngleBracketed
528                        }
529                        LinkType::Reference => {
530                            formatter.write_char('[')?;
531                            LinkCategory::Reference {
532                                uri: dest_url.clone().into(),
533                                title: title.clone().into(),
534                                id: id.clone().into(),
535                            }
536                        }
537                        LinkType::Collapsed => {
538                            state.current_shortcut_text = Some(String::new());
539                            formatter.write_char('[')?;
540                            LinkCategory::Collapsed {
541                                uri: dest_url.clone().into(),
542                                title: title.clone().into(),
543                            }
544                        }
545                        LinkType::Shortcut => {
546                            state.current_shortcut_text = Some(String::new());
547                            formatter.write_char('[')?;
548                            LinkCategory::Shortcut {
549                                uri: dest_url.clone().into(),
550                                title: title.clone().into(),
551                            }
552                        }
553                        _ => {
554                            formatter.write_char('[')?;
555                            LinkCategory::Other {
556                                uri: dest_url.clone().into(),
557                                title: title.clone().into(),
558                            }
559                        }
560                    });
561                    Ok(())
562                }
563                Image {
564                    link_type,
565                    dest_url,
566                    title,
567                    id,
568                } => {
569                    state.image_stack.push(match link_type {
570                        LinkType::Reference => ImageLink::Reference {
571                            uri: dest_url.clone().into(),
572                            title: title.clone().into(),
573                            id: id.clone().into(),
574                        },
575                        LinkType::Collapsed => {
576                            state.current_shortcut_text = Some(String::new());
577                            ImageLink::Collapsed {
578                                uri: dest_url.clone().into(),
579                                title: title.clone().into(),
580                            }
581                        }
582                        LinkType::Shortcut => {
583                            state.current_shortcut_text = Some(String::new());
584                            ImageLink::Shortcut {
585                                uri: dest_url.clone().into(),
586                                title: title.clone().into(),
587                            }
588                        }
589                        _ => ImageLink::Other {
590                            uri: dest_url.clone().into(),
591                            title: title.clone().into(),
592                        },
593                    });
594                    formatter.write_str("![")
595                }
596                Emphasis => formatter.write_char(options.emphasis_token),
597                Strong => formatter.write_str(options.strong_token),
598                FootnoteDefinition(name) => {
599                    state.padding.push("    ".into());
600                    write!(formatter, "[^{name}]: ")
601                }
602                Paragraph => {
603                    state.last_was_paragraph_start = true;
604                    Ok(())
605                }
606                Heading {
607                    level,
608                    id,
609                    classes,
610                    attrs,
611                } => {
612                    if state.current_heading.is_some() {
613                        return Err(Error::UnexpectedEvent);
614                    }
615                    state.current_heading = Some(self::Heading {
616                        id: id.as_ref().map(|id| id.clone().into()),
617                        classes: classes.iter().map(|class| class.clone().into()).collect(),
618                        attributes: attrs
619                            .iter()
620                            .map(|(k, v)| (k.clone().into(), v.as_ref().map(|val| val.clone().into())))
621                            .collect(),
622                    });
623                    // Write '#', '##', '###', etc. based on the heading level.
624                    write!(formatter, "{} ", Repeated('#', *level as usize))
625                }
626                BlockQuote(kind) => {
627                    let every_line_padding = " > ";
628                    let first_line_padding = kind
629                        .map(|kind| match kind {
630                            BlockQuoteKind::Note => " > [!NOTE]",
631                            BlockQuoteKind::Tip => " > [!TIP]",
632                            BlockQuoteKind::Important => " > [!IMPORTANT]",
633                            BlockQuoteKind::Warning => " > [!WARNING]",
634                            BlockQuoteKind::Caution => " > [!CAUTION]",
635                        })
636                        .unwrap_or(every_line_padding);
637                    state.newlines_before_start = 1;
638
639                    // if we consumed some newlines, we know that we can just write out the next
640                    // level in our blockquote. This should work regardless if we have other
641                    // padding or if we're in a list
642                    if !consumed_newlines {
643                        write_padded_newline(formatter, state)?;
644                    }
645                    formatter.write_str(first_line_padding)?;
646                    state.padding.push(every_line_padding.into());
647                    Ok(())
648                }
649                CodeBlock(pulldown_cmark::CodeBlockKind::Indented) => {
650                    state.code_block = Some(CodeBlockKind::Indented);
651                    state.padding.push("    ".into());
652                    if consumed_newlines {
653                        formatter.write_str("    ")
654                    } else {
655                        write_padded_newline(formatter, &state)
656                    }
657                }
658                CodeBlock(pulldown_cmark::CodeBlockKind::Fenced(info)) => {
659                    state.code_block = Some(CodeBlockKind::Fenced);
660                    if !consumed_newlines {
661                        write_padded_newline(formatter, &state)?;
662                    }
663
664                    let fence = Repeated(options.code_block_token, options.code_block_token_count);
665                    write!(formatter, "{fence}{info}")?;
666                    write_padded_newline(formatter, &state)
667                }
668                HtmlBlock => Ok(()),
669                MetadataBlock(MetadataBlockKind::YamlStyle) => formatter.write_str("---\n"),
670                MetadataBlock(MetadataBlockKind::PlusesStyle) => formatter.write_str("+++\n"),
671                List(_) => Ok(()),
672                Strikethrough => formatter.write_str("~~"),
673                DefinitionList => Ok(()),
674                DefinitionListTitle => {
675                    state.set_minimum_newlines_before_start(options.newlines_after_rest);
676                    Ok(())
677                }
678                DefinitionListDefinition => {
679                    let every_line_padding = "  ";
680                    let first_line_padding = ": ";
681
682                    padding(formatter, &state.padding).and(formatter.write_str(first_line_padding))?;
683                    state.padding.push(every_line_padding.into());
684                    Ok(())
685                }
686                Superscript => formatter.write_str(if options.use_html_for_super_sub_script {
687                    "<sup>"
688                } else {
689                    "^"
690                }),
691                Subscript => formatter.write_str(if options.use_html_for_super_sub_script {
692                    "<sub>"
693                } else {
694                    "~"
695                }),
696            }
697        }
698        End(tag) => match tag {
699            TagEnd::Link => match if let Some(link_cat) = state.link_stack.pop() {
700                link_cat
701            } else {
702                return Err(Error::UnexpectedEvent);
703            } {
704                LinkCategory::AngleBracketed => formatter.write_char('>'),
705                LinkCategory::Reference { uri, title, id } => {
706                    state
707                        .shortcuts
708                        .push((id.to_string(), uri.to_string(), title.to_string()));
709                    formatter.write_str("][")?;
710                    formatter.write_str(&id)?;
711                    formatter.write_char(']')
712                }
713                LinkCategory::Collapsed { uri, title } => {
714                    if let Some(shortcut_text) = state.current_shortcut_text.take() {
715                        state
716                            .shortcuts
717                            .push((shortcut_text, uri.to_string(), title.to_string()));
718                    }
719                    formatter.write_str("][]")
720                }
721                LinkCategory::Shortcut { uri, title } => {
722                    if let Some(shortcut_text) = state.current_shortcut_text.take() {
723                        state
724                            .shortcuts
725                            .push((shortcut_text, uri.to_string(), title.to_string()));
726                    }
727                    formatter.write_char(']')
728                }
729                LinkCategory::Other { uri, title } => close_link(&uri, &title, formatter, LinkType::Inline),
730            },
731            TagEnd::Image => match if let Some(img_link) = state.image_stack.pop() {
732                img_link
733            } else {
734                return Err(Error::UnexpectedEvent);
735            } {
736                ImageLink::Reference { uri, title, id } => {
737                    state
738                        .shortcuts
739                        .push((id.to_string(), uri.to_string(), title.to_string()));
740                    formatter.write_str("][")?;
741                    formatter.write_str(&id)?;
742                    formatter.write_char(']')
743                }
744                ImageLink::Collapsed { uri, title } => {
745                    if let Some(shortcut_text) = state.current_shortcut_text.take() {
746                        state
747                            .shortcuts
748                            .push((shortcut_text, uri.to_string(), title.to_string()));
749                    }
750                    formatter.write_str("][]")
751                }
752                ImageLink::Shortcut { uri, title } => {
753                    if let Some(shortcut_text) = state.current_shortcut_text.take() {
754                        state
755                            .shortcuts
756                            .push((shortcut_text, uri.to_string(), title.to_string()));
757                    }
758                    formatter.write_char(']')
759                }
760                ImageLink::Other { uri, title } => {
761                    close_link(uri.as_ref(), title.as_ref(), formatter, LinkType::Inline)
762                }
763            },
764            TagEnd::Emphasis => formatter.write_char(options.emphasis_token),
765            TagEnd::Strong => formatter.write_str(options.strong_token),
766            TagEnd::Heading(_) => {
767                let Some(self::Heading {
768                    id,
769                    classes,
770                    attributes,
771                }) = state.current_heading.take()
772                else {
773                    return Err(Error::UnexpectedEvent);
774                };
775                let emit_braces = id.is_some() || !classes.is_empty() || !attributes.is_empty();
776                if emit_braces {
777                    formatter.write_str(" {")?;
778                }
779                if let Some(id_str) = id {
780                    formatter.write_char(' ')?;
781                    formatter.write_char('#')?;
782                    formatter.write_str(&id_str)?;
783                }
784                for class in &classes {
785                    formatter.write_char(' ')?;
786                    formatter.write_char('.')?;
787                    formatter.write_str(class)?;
788                }
789                for (key, val) in &attributes {
790                    formatter.write_char(' ')?;
791                    formatter.write_str(key)?;
792                    if let Some(val) = val {
793                        formatter.write_char('=')?;
794                        formatter.write_str(val)?;
795                    }
796                }
797                if emit_braces {
798                    formatter.write_char(' ')?;
799                    formatter.write_char('}')?;
800                }
801                state.set_minimum_newlines_before_start(options.newlines_after_headline);
802                Ok(())
803            }
804            TagEnd::Paragraph => {
805                state.set_minimum_newlines_before_start(options.newlines_after_paragraph);
806                Ok(())
807            }
808            TagEnd::CodeBlock => {
809                state.set_minimum_newlines_before_start(options.newlines_after_codeblock);
810                if last_was_text_without_trailing_newline {
811                    write_padded_newline(formatter, &state)?;
812                }
813                match state.code_block {
814                    Some(CodeBlockKind::Fenced) => {
815                        let fence = Repeated(options.code_block_token, options.code_block_token_count);
816                        write!(formatter, "{fence}")?;
817                    }
818                    Some(CodeBlockKind::Indented) => {
819                        state.padding.pop();
820                    }
821                    None => {}
822                }
823                state.code_block = None;
824                Ok(())
825            }
826            TagEnd::HtmlBlock => {
827                state.set_minimum_newlines_before_start(options.newlines_after_htmlblock);
828                Ok(())
829            }
830            TagEnd::MetadataBlock(MetadataBlockKind::PlusesStyle) => {
831                state.set_minimum_newlines_before_start(options.newlines_after_metadata);
832                formatter.write_str("+++\n")
833            }
834            TagEnd::MetadataBlock(MetadataBlockKind::YamlStyle) => {
835                state.set_minimum_newlines_before_start(options.newlines_after_metadata);
836                formatter.write_str("---\n")
837            }
838            TagEnd::Table => {
839                state.set_minimum_newlines_before_start(options.newlines_after_table);
840                state.table_alignments.clear();
841                state.table_headers.clear();
842                Ok(())
843            }
844            TagEnd::TableCell => {
845                state
846                    .table_headers
847                    .push(state.text_for_header.take().unwrap_or_default());
848                state.in_table_cell = false;
849                Ok(())
850            }
851            t @ (TagEnd::TableRow | TagEnd::TableHead) => {
852                state.set_minimum_newlines_before_start(options.newlines_after_rest);
853                formatter.write_char('|')?;
854
855                if let TagEnd::TableHead = t {
856                    write_padded_newline(formatter, &state)?;
857                    for (alignment, name) in state.table_alignments.iter().zip(state.table_headers.iter()) {
858                        formatter.write_char('|')?;
859                        // NOTE: For perfect counting, count grapheme clusters.
860                        // The reason this is not done is to avoid the dependency.
861
862                        // The minimum width of the column so that we can represent its alignment.
863                        let min_width = match alignment {
864                            // Must at least represent `-`.
865                            Alignment::None => 1,
866                            // Must at least represent `:-` or `-:`
867                            Alignment::Left | Alignment::Right => 2,
868                            // Must at least represent `:-:`
869                            Alignment::Center => 3,
870                        };
871                        let length = name.chars().count().max(min_width);
872                        let last_minus_one = length.saturating_sub(1);
873                        for c in 0..length {
874                            formatter.write_char(
875                                if (c == 0 && (alignment == &Alignment::Center || alignment == &Alignment::Left))
876                                    || (c == last_minus_one
877                                        && (alignment == &Alignment::Center || alignment == &Alignment::Right))
878                                {
879                                    ':'
880                                } else {
881                                    '-'
882                                },
883                            )?;
884                        }
885                    }
886                    formatter.write_char('|')?;
887                }
888                Ok(())
889            }
890            TagEnd::Item => {
891                state.padding.pop();
892                state.set_minimum_newlines_before_start(options.newlines_after_rest);
893                Ok(())
894            }
895            TagEnd::List(_) => {
896                state.list_stack.pop();
897                if state.list_stack.is_empty() {
898                    state.set_minimum_newlines_before_start(options.newlines_after_list);
899                }
900                Ok(())
901            }
902            TagEnd::BlockQuote(_) => {
903                state.padding.pop();
904
905                state.set_minimum_newlines_before_start(options.newlines_after_blockquote);
906
907                Ok(())
908            }
909            TagEnd::FootnoteDefinition => {
910                state.padding.pop();
911                Ok(())
912            }
913            TagEnd::Strikethrough => formatter.write_str("~~"),
914            TagEnd::DefinitionList => {
915                state.set_minimum_newlines_before_start(options.newlines_after_list);
916                Ok(())
917            }
918            TagEnd::DefinitionListTitle => formatter.write_char('\n'),
919            TagEnd::DefinitionListDefinition => {
920                state.padding.pop();
921                write_padded_newline(formatter, &state)
922            }
923            TagEnd::Superscript => formatter.write_str(if options.use_html_for_super_sub_script {
924                "</sup>"
925            } else {
926                "^"
927            }),
928            TagEnd::Subscript => formatter.write_str(if options.use_html_for_super_sub_script {
929                "</sub>"
930            } else {
931                "~"
932            }),
933        },
934        HardBreak => formatter.write_str("  ").and(write_padded_newline(formatter, &state)),
935        SoftBreak => write_padded_newline(formatter, &state),
936        Text(text) => {
937            let mut text = &text[..];
938            if let Some(shortcut_text) = state.current_shortcut_text.as_mut() {
939                shortcut_text.push_str(text);
940            }
941            if let Some(text_for_header) = state.text_for_header.as_mut() {
942                text_for_header.push_str(text);
943            }
944            consume_newlines(formatter, state)?;
945            if last_was_paragraph_start {
946                if text.starts_with('\t') {
947                    formatter.write_str("&#9;")?;
948                    text = &text[1..];
949                } else if text.starts_with(' ') {
950                    formatter.write_str("&#32;")?;
951                    text = &text[1..];
952                }
953            }
954            state.last_was_text_without_trailing_newline = !text.ends_with('\n');
955            let escaped_text = escape_special_characters(text, state, options);
956            print_text_without_trailing_newline(&escaped_text, formatter, &state)
957        }
958        InlineHtml(text) => {
959            consume_newlines(formatter, state)?;
960            print_text_without_trailing_newline(text, formatter, &state)
961        }
962        Html(text) => {
963            let mut lines = text.split('\n');
964            if let Some(line) = lines.next() {
965                formatter.write_str(line)?;
966            }
967            for line in lines {
968                write_padded_newline(formatter, &state)?;
969                formatter.write_str(line)?;
970            }
971            Ok(())
972        }
973        FootnoteReference(name) => write!(formatter, "[^{name}]"),
974        TaskListMarker(checked) => {
975            let check = if *checked { "x" } else { " " };
976            write!(formatter, "[{check}] ")
977        }
978        InlineMath(text) => write!(formatter, "${text}$"),
979        DisplayMath(text) => write!(formatter, "$${text}$$"),
980    };
981
982    Ok(res?)
983}
984
985impl State<'_> {
986    /// Finalize the serialization state by writing any remaining shortcuts.
987    ///
988    /// This should be called after all events have been processed to ensure
989    /// reference-style links are written at the end of the document.
990    pub fn finalize<F>(mut self, mut formatter: F) -> Result<Self, Error>
991    where
992        F: fmt::Write,
993    {
994        if self.shortcuts.is_empty() {
995            return Ok(self);
996        }
997
998        formatter.write_str("\n")?;
999        let mut written_shortcuts = HashSet::new();
1000        for shortcut in self.shortcuts.drain(..) {
1001            if written_shortcuts.contains(&shortcut) {
1002                continue;
1003            }
1004            write!(formatter, "\n[{}", shortcut.0)?;
1005            close_link(&shortcut.1, &shortcut.2, &mut formatter, LinkType::Shortcut)?;
1006            written_shortcuts.insert(shortcut);
1007        }
1008        Ok(self)
1009    }
1010
1011    /// Returns `true` if currently serializing content inside a code block.
1012    pub fn is_in_code_block(&self) -> bool {
1013        self.code_block.is_some()
1014    }
1015
1016    /// Ensure that [`State::newlines_before_start`] is at least as large as
1017    /// the provided option value.
1018    fn set_minimum_newlines_before_start(&mut self, option_value: usize) {
1019        if self.newlines_before_start < option_value {
1020            self.newlines_before_start = option_value
1021        }
1022    }
1023}
1024
1025/// Return the `<seen amount of consecutive fenced code-block tokens> + 1` that occur *within* a
1026/// fenced code-block `events`.
1027///
1028/// Use this function to obtain the correct value for `code_block_token_count` field of [`Options`]
1029/// to assure that the enclosing code-blocks remain functional as such.
1030///
1031/// Returns `None` if `events` didn't include any code-block, or the code-block didn't contain
1032/// a nested block. In that case, the correct amount of fenced code-block tokens is
1033/// [`DEFAULT_CODE_BLOCK_TOKEN_COUNT`].
1034///
1035/// ```rust
1036/// use pulldown_cmark::Event;
1037/// use pulldown_cmark_to_cmark::*;
1038///
1039/// let events = &[Event::Text("text".into())];
1040/// let code_block_token_count = calculate_code_block_token_count(events).unwrap_or(DEFAULT_CODE_BLOCK_TOKEN_COUNT);
1041/// let options = Options {
1042///     code_block_token_count,
1043///     ..Default::default()
1044/// };
1045/// let mut buf = String::new();
1046/// cmark_with_options(events.iter(), &mut buf, options);
1047/// ```
1048pub fn calculate_code_block_token_count<'a, I, E>(events: I) -> Option<usize>
1049where
1050    I: IntoIterator<Item = E>,
1051    E: Borrow<Event<'a>>,
1052{
1053    let mut in_codeblock = false;
1054    let mut max_token_count = 0;
1055
1056    // token_count should be taken over Text events
1057    // because a continuous text may be splitted to some Text events.
1058    let mut token_count = 0;
1059    let mut prev_token_char = None;
1060    for event in events {
1061        match event.borrow() {
1062            Event::Start(Tag::CodeBlock(_)) => {
1063                in_codeblock = true;
1064            }
1065            Event::End(TagEnd::CodeBlock) => {
1066                in_codeblock = false;
1067                prev_token_char = None;
1068            }
1069            Event::Text(x) if in_codeblock => {
1070                for c in x.chars() {
1071                    let prev_token = prev_token_char.take();
1072                    if c == '`' || c == '~' {
1073                        prev_token_char = Some(c);
1074                        if Some(c) == prev_token {
1075                            token_count += 1;
1076                        } else {
1077                            max_token_count = max_token_count.max(token_count);
1078                            token_count = 1;
1079                        }
1080                    }
1081                }
1082            }
1083            _ => prev_token_char = None,
1084        }
1085    }
1086
1087    max_token_count = max_token_count.max(token_count);
1088    (max_token_count >= 3).then_some(max_token_count + 1)
1089}