cmarkfmt/
lib.rs

1//! # cmarkfmt
2//!
3//! A library for formatting CommonMark files.
4//!
5//! ## Usage
6//!
7//! ```
8//! let input = r#"# This is markdown
9//! It *needs* to be formatted."#;
10//!
11//! let cmfmt = cmarkfmt::Formatter::default();
12//! let output = cmfmt.format_cmark(input);
13//! println!("{output}");
14//! ```
15
16use std::fmt::{self, Debug, Write};
17
18use pulldown_cmark::{
19    Alignment, CodeBlockKind, Event, HeadingLevel, LinkType, Options as POptions, Parser, Tag,
20};
21
22/// Function for formatting code blocks within markdown.
23///
24/// The first parameter is the language, and the second parameter is the code
25/// itself. If formatted, returns `Some(String)` with the code block to use.
26///
27/// Example:
28///
29/// ```
30/// let input = r#"
31/// \`\`\`json
32/// {
33///   "key": "value"
34/// }
35/// \`\`\`"#;
36///
37/// let cmfmt = cmarkfmt::Formatter::default()
38///     .with_code_formatter(Some(&|lang, code| {
39///         if lang == "json" {
40///             Some(code.to_string())
41///         } else {
42///             None
43///         }
44///     }));
45///
46/// let output = cmfmt.format_cmark(input);
47/// println!("{output}");
48/// ```
49pub type CodeFormatFn<'a> = &'a dyn Fn(&str, &str) -> Option<String>;
50
51/// A `Formatter` is needed to format markdown. It is created and customized as
52/// needed using the `with_*` methods.
53///
54/// Once created, the `format_cmark` or `format_cmark_writer` methods can be
55/// used.
56#[derive(Clone)]
57pub struct Formatter<'a> {
58    code_fmt: Option<CodeFormatFn<'a>>,
59    blockquote: &'a str,
60    emphasis: &'a str,
61    unordered_list: &'a str,
62}
63
64impl Default for Formatter<'_> {
65    fn default() -> Self {
66        Self {
67            code_fmt: None,
68            blockquote: ">",
69            emphasis: "_",
70            unordered_list: "-",
71        }
72    }
73}
74
75impl Debug for Formatter<'_> {
76    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
77        f.debug_struct("FormatBuilder")
78            .field("code_fmt", &self.code_fmt.map(|_| ()))
79            .field("blockquote", &self.blockquote)
80            .field("emphasis", &self.emphasis)
81            .field("unordered_list", &self.unordered_list)
82            .finish()
83    }
84}
85
86impl<'a> Formatter<'a> {
87    /// Format markdown, returning the formatted result as a String.
88    pub fn format_cmark(&self, input: &str) -> String {
89        let mut out = String::with_capacity(input.len() + 128);
90        self.format_cmark_writer(input, &mut out).unwrap();
91        out
92    }
93
94    /// Format markdown, writing the result to the provided Writer.
95    pub fn format_cmark_writer<W: fmt::Write>(&self, input: &str, w: W) -> fmt::Result {
96        let mut opts = POptions::all();
97        opts.remove(POptions::ENABLE_SMART_PUNCTUATION);
98        let parser = Parser::new_ext(input, opts);
99
100        let mut refdefs = parser
101            .reference_definitions()
102            .iter()
103            .map(|(label, linkdef)| Reference {
104                label: label.to_owned(),
105                dest: linkdef.dest.to_string(),
106                title: linkdef.title.as_ref().map(|v| v.to_string()),
107            })
108            .collect::<Vec<_>>();
109        refdefs.sort_by(|r1, r2| r1.label.cmp(&r2.label));
110
111        let mut ctx = Context::new(w, refdefs, self.into());
112        ctx.format(parser)
113    }
114
115    /// Sets the `Formatter`s code formatter function. By default, code blocks
116    /// are not formatted.
117    pub fn with_code_formatter(self, code_fmt: Option<CodeFormatFn<'a>>) -> Self {
118        Formatter { code_fmt, ..self }
119    }
120
121    /// Sets the blockquote string. Default: ">".
122    pub fn with_blockquote(self, blockquote: &'a str) -> Self {
123        Formatter { blockquote, ..self }
124    }
125
126    /// Sets the emphasis string. Default: "_".
127    pub fn with_emphasis(self, emphasis: &'a str) -> Self {
128        Formatter { emphasis, ..self }
129    }
130
131    /// Sets the unordered list string. Default: "-".
132    pub fn with_unordered_list(self, unordered_list: &'a str) -> Self {
133        Formatter {
134            unordered_list,
135            ..self
136        }
137    }
138}
139
140const STRONG: &str = "**";
141const STRIKETHROUGH: &str = "~~";
142
143enum StackItem {
144    Blockquote,
145    CodeIndent,
146    List(Option<String>, bool, bool),
147}
148
149struct Options<'a> {
150    code_fmt: &'a Option<CodeFormatFn<'a>>,
151    blockquote_str: &'a str,
152    emphasis_str: &'a str,
153    unordered_list_str: &'a str,
154}
155
156impl<'a> From<&'a Formatter<'a>> for Options<'a> {
157    fn from(v: &'a Formatter<'a>) -> Self {
158        Options {
159            code_fmt: &v.code_fmt,
160            blockquote_str: v.blockquote,
161            emphasis_str: v.emphasis,
162            unordered_list_str: v.unordered_list,
163        }
164    }
165}
166
167struct Context<'a, W: fmt::Write> {
168    writer: W,
169    refdefs: Vec<Reference>,
170    opts: Options<'a>,
171    table: Option<Table>,
172    stack: Vec<StackItem>,
173    text_buf: String,
174    scratch: String,
175    newline_required: bool,
176    code_block: Option<Option<String>>,
177    last_line_blank: bool,
178}
179
180impl<'a, W: fmt::Write> Context<'a, W> {
181    fn new(writer: W, refdefs: Vec<Reference>, opts: Options<'a>) -> Self {
182        Context {
183            writer,
184            refdefs,
185            opts,
186            table: None,
187            stack: Vec::new(),
188            scratch: String::with_capacity(512),
189            text_buf: String::with_capacity(512),
190            newline_required: false,
191            code_block: None,
192            last_line_blank: true,
193        }
194    }
195
196    fn format(&mut self, parser: Parser) -> fmt::Result {
197        let mut is_last_html = false;
198        for event in parser {
199            if is_last_html {
200                match event {
201                    Event::Html(_) | Event::Text(_) | Event::SoftBreak | Event::End(_) => {}
202                    _ => self.write_newline()?,
203                }
204                is_last_html = false;
205            }
206
207            match event {
208                Event::Start(tag) => self.tag_start(tag)?,
209                Event::End(tag) => self.tag_end(tag)?,
210                Event::Text(s) => {
211                    let out: String;
212                    let mut text: &str = &s;
213                    if let Some(Some(lang)) = &self.code_block {
214                        if let Some(code_fmt) = &self.opts.code_fmt {
215                            if let Some(v) = (code_fmt)(lang, &s) {
216                                out = v;
217                                text = &out;
218                            }
219                        }
220                    }
221                    self.write_optional_escape(text)?;
222                    self.write_str(text)?;
223                }
224                Event::Code(s) => {
225                    self.write_char('`')?;
226                    if let Some('`') = s.chars().next() {
227                        self.write_backslash()?;
228                    }
229                    self.write_str(&s)?;
230                    self.write_char('`')?;
231                }
232                Event::Html(s) => {
233                    if self.text_buf.is_empty() {
234                        self.write_newline_if_required()?;
235                    }
236                    self.write_str(&s)?;
237                    if s.ends_with('\n') {
238                        self.write_newline()?;
239                    }
240                    is_last_html = true;
241                }
242                Event::SoftBreak => self.write_newline()?,
243                Event::HardBreak => {
244                    //self.write_str("  ")?;
245                    self.write_char('\\')?;
246                    self.write_newline_with_trim(false)?;
247                }
248                Event::Rule => {
249                    if self.newline_required {
250                        self.write_newline()?;
251                    }
252                    self.write_str("---")?;
253                    self.write_newline()?;
254                    self.newline_required = true;
255                }
256                Event::TaskListMarker(is_checked) => {
257                    self.write_char('[')?;
258                    self.write_char(if is_checked { 'x' } else { ' ' })?;
259                    self.write_str("] ")?;
260                }
261                Event::FootnoteReference(label) => {
262                    self.write_str("[^")?;
263                    self.write_str(&label)?;
264                    self.write_char(']')?;
265                }
266            }
267        }
268
269        let refdefs = std::mem::take(&mut self.refdefs);
270        if !refdefs.is_empty() {
271            self.write_newline()?;
272            for refdef in refdefs {
273                self.write_char('[')?;
274                self.write_str(&refdef.label)?;
275                self.write_str("]: ")?;
276                self.write_str(&refdef.dest)?;
277                if let Some(title) = refdef.title {
278                    self.write_str(" \"")?;
279                    self.write_str(&title)?;
280                    self.write_char('"')?;
281                }
282                self.write_newline()?;
283            }
284        }
285
286        Ok(())
287    }
288
289    fn tag_start(&mut self, tag: Tag) -> fmt::Result {
290        self.write_newline_if_required()?;
291        match tag {
292            Tag::Heading(lvl, _, _) => self.write_heading_level(lvl)?,
293            Tag::BlockQuote => self.stack.push(StackItem::Blockquote),
294            Tag::CodeBlock(kind) => {
295                if !self.text_buf.is_empty() {
296                    self.write_newline()?;
297                }
298                match kind {
299                    CodeBlockKind::Indented => {
300                        self.code_block = Some(None);
301                        self.stack.push(StackItem::CodeIndent)
302                    }
303                    CodeBlockKind::Fenced(s) => {
304                        self.write_str("```")?;
305                        self.write_str(&s)?;
306                        self.write_newline()?;
307                        self.code_block = Some(Some(s.into_string()));
308                    }
309                }
310            }
311            Tag::List(l) => {
312                if let Some(StackItem::List(_, _, newline)) = self.stack.last_mut() {
313                    *newline = true;
314                    self.write_newline()?;
315                }
316                let l = l.map(|v| v.to_string());
317                self.stack.push(StackItem::List(l, false, false));
318            }
319            Tag::Item => {
320                if let Some(StackItem::List(_, written, newline)) = self.stack.last_mut() {
321                    *written = false;
322                    *newline = false;
323                }
324            }
325            Tag::FootnoteDefinition(value) => {
326                self.write_str("[^")?;
327                self.write_str(&value)?;
328                self.write_str("]: ")?;
329            }
330            Tag::Table(alignments) => self.table = Some(Table::new(alignments)),
331            Tag::TableRow => {
332                if let Some(table) = self.table.as_mut() {
333                    table.body.push(Vec::with_capacity(table.head.len()));
334                }
335            }
336            Tag::Emphasis => self.write_str(self.opts.emphasis_str)?,
337            Tag::Strong => self.write_str(STRONG)?,
338            Tag::Strikethrough => self.write_str(STRIKETHROUGH)?,
339            Tag::Link(typ, _, _) => match typ {
340                LinkType::Autolink | LinkType::Email => self.write_char('<')?,
341                _ => self.write_char('[')?,
342            },
343            Tag::Image(_, _, _) => self.write_str("![")?,
344            Tag::Paragraph | Tag::TableHead | Tag::TableCell => {}
345        }
346        Ok(())
347    }
348
349    fn tag_end(&mut self, tag: Tag) -> fmt::Result {
350        match tag {
351            Tag::Paragraph => {
352                if !matches!(self.stack.last(), Some(StackItem::List(..))) {
353                    self.newline_required = true;
354                }
355                if let Some(StackItem::List(_, _, newline)) = self.stack.last_mut() {
356                    *newline = true;
357                }
358                self.write_newline_if_content()
359            }
360            Tag::Heading(_, id, classes) => {
361                if id.is_some() || !classes.is_empty() {
362                    self.write_char('{')?;
363                    if let Some(id) = id {
364                        self.write_str(" #")?;
365                        self.write_str(id)?;
366                    }
367                    for class in classes {
368                        self.write_str(" .")?;
369                        self.write_str(class)?;
370                    }
371                    self.write_str(" }")?;
372                }
373
374                self.newline_required = true;
375                self.write_newline()
376            }
377            Tag::BlockQuote => {
378                self.stack.pop();
379                if !matches!(self.stack.last(), Some(StackItem::List(..))) {
380                    self.newline_required = true;
381                }
382                Ok(())
383            }
384            Tag::CodeBlock(kind) => {
385                if let CodeBlockKind::Fenced(_) = kind {
386                    self.write_str("```")?;
387                }
388                self.write_newline()?;
389                if let CodeBlockKind::Indented = kind {
390                    self.stack.pop();
391                }
392                self.newline_required = true;
393                self.code_block = None;
394                Ok(())
395            }
396            Tag::List(_) => {
397                self.stack.pop();
398                if !self
399                    .stack
400                    .iter()
401                    .any(|v| matches!(&v, StackItem::List(_, _, _)))
402                {
403                    self.newline_required = true;
404                }
405                Ok(())
406            }
407            Tag::Item => {
408                if let Some(StackItem::List(_, _, false)) = self.stack.last() {
409                    self.write_newline_if_content()?;
410                }
411                Ok(())
412            }
413            Tag::Table(_) => {
414                let table = match self.table.take() {
415                    Some(table) => table,
416                    None => return Ok(()),
417                };
418                let widths = table.column_widths();
419                self.write_table_row(&table.head, &widths)?;
420
421                self.write_char('|')?;
422                for (w, a) in widths.iter().zip(table.alignments.iter()) {
423                    self.write_char(' ')?;
424                    self.write_char(if matches!(a, Alignment::Left | Alignment::Center) {
425                        ':'
426                    } else {
427                        '-'
428                    })?;
429                    for _ in 0..*w - 2 {
430                        self.write_char('-')?;
431                    }
432                    self.write_char(if matches!(a, Alignment::Right | Alignment::Center) {
433                        ':'
434                    } else {
435                        '-'
436                    })?;
437                    self.write_str(" |")?;
438                }
439                self.write_newline()?;
440
441                for b in &table.body {
442                    self.write_table_row(b, &widths)?;
443                }
444
445                self.table = None;
446                self.newline_required = true;
447
448                if let Some(StackItem::List(_, _, newline)) = self.stack.last_mut() {
449                    *newline = true;
450                }
451
452                Ok(())
453            }
454            Tag::TableCell => {
455                if let Some(table) = self.table.as_mut() {
456                    if let Some(b) = table.body.last_mut() {
457                        b.push(self.text_buf.to_string());
458                    } else {
459                        table.head.push(self.text_buf.to_string());
460                    }
461                    self.text_buf.clear();
462                }
463                Ok(())
464            }
465            Tag::Emphasis => self.write_str(self.opts.emphasis_str),
466            Tag::Strong => self.write_str(STRONG),
467            Tag::Strikethrough => self.write_str(STRIKETHROUGH),
468            Tag::Link(LinkType::Reference | LinkType::ReferenceUnknown, dest, title) => {
469                let refdefs = std::mem::take(&mut self.refdefs);
470                if let Some(refdef) = refdefs.iter().find(|v| dest.eq_ignore_ascii_case(&v.dest)) {
471                    self.write_str("][")?;
472                    self.write_str(&refdef.label)?;
473                    self.write_char(']')?;
474                } else {
475                    self.write_str("](")?;
476                    self.write_str(&dest)?;
477                    if !title.is_empty() {
478                        self.write_str(" \"")?;
479                        self.write_str(&title)?;
480                        self.write_char('"')?;
481                    }
482                    self.write_char(')')?;
483                }
484                self.refdefs = refdefs;
485                Ok(())
486            }
487            Tag::Link(LinkType::Shortcut | LinkType::ShortcutUnknown, ..) => self.write_char(']'),
488            Tag::Link(LinkType::Collapsed | LinkType::CollapsedUnknown, ..) => {
489                self.write_str("][]")
490            }
491            Tag::Link(LinkType::Autolink | LinkType::Email, ..) => self.write_char('>'),
492            Tag::Link(_, dest, title) | Tag::Image(_, dest, title) => {
493                self.write_str("](")?;
494                self.write_str(&dest)?;
495                if !title.is_empty() {
496                    self.write_str(" \"")?;
497                    self.write_str(&title)?;
498                    self.write_char('"')?;
499                }
500                self.write_char(')')
501            }
502            Tag::FootnoteDefinition(_) | Tag::TableHead | Tag::TableRow => Ok(()),
503        }
504    }
505
506    fn write_optional_escape(&mut self, s: &str) -> fmt::Result {
507        if self.code_block.is_some() {
508            if s.starts_with("```") {
509                self.write_backslash()?;
510            }
511            return Ok(());
512        }
513        if let Some(first) = s.chars().next() {
514            if self.table.is_some() && first == '|' {
515                return self.write_backslash();
516            }
517            match first {
518                '\\' | '<' | '>' | '*' | '_' | '`' | '[' | ']' | '~' => {
519                    return self.write_backslash()
520                }
521                '#' | '-' | '+' => {
522                    if self.text_buf.is_empty() {
523                        return self.write_backslash();
524                    }
525                }
526                _ => {}
527            }
528        }
529        Ok(())
530    }
531
532    fn write_backslash(&mut self) -> fmt::Result {
533        self.text_buf.write_char('\\')
534    }
535
536    fn write_table_row(&mut self, row: &[String], widths: &[usize]) -> fmt::Result {
537        self.write_str("|")?;
538        for (s, w) in row.iter().zip(widths.iter()) {
539            let width = s.chars().count();
540            self.write_char(' ')?;
541            self.write_str(s)?;
542            for _ in 0..(w - width) {
543                self.write_char(' ')?;
544            }
545            self.write_str(" |")?;
546        }
547        self.write_newline()
548    }
549
550    fn write_newline_if_required(&mut self) -> fmt::Result {
551        if self.newline_required {
552            self.write_newline()?;
553            self.newline_required = false;
554        }
555        Ok(())
556    }
557
558    fn write_newline_if_content(&mut self) -> fmt::Result {
559        if !self.text_buf.is_empty() || !self.stack.is_empty() {
560            self.write_newline()?;
561        }
562        Ok(())
563    }
564
565    fn write_heading_level(&mut self, lvl: HeadingLevel) -> fmt::Result {
566        match lvl {
567            HeadingLevel::H1 => self.write_str("# "),
568            HeadingLevel::H2 => self.write_str("## "),
569            HeadingLevel::H3 => self.write_str("### "),
570            HeadingLevel::H4 => self.write_str("#### "),
571            HeadingLevel::H5 => self.write_str("##### "),
572            HeadingLevel::H6 => self.write_str("###### "),
573        }
574    }
575
576    fn write_str(&mut self, s: &str) -> fmt::Result {
577        self.text_buf.write_str(s)
578    }
579
580    fn write_char(&mut self, c: char) -> fmt::Result {
581        self.text_buf.write_char(c)
582    }
583
584    fn write_newline(&mut self) -> fmt::Result {
585        self.write_newline_with_trim(true)
586    }
587
588    fn write_newline_with_trim(&mut self, trim: bool) -> fmt::Result {
589        if !self.text_buf.is_empty() {
590            let mut text_buf = std::mem::take(&mut self.text_buf);
591            for line in text_buf.lines() {
592                self.write_line(line, trim)?;
593            }
594            text_buf.clear();
595            self.text_buf = text_buf;
596        } else {
597            self.write_line("", trim)?;
598        }
599        Ok(())
600    }
601
602    fn write_line(&mut self, line: &str, trim: bool) -> fmt::Result {
603        self.write_padding_to_scratch()?;
604        self.scratch.write_str(line)?;
605        let buf = if trim {
606            self.scratch.trim_end()
607        } else {
608            &self.scratch
609        };
610        if !buf.is_empty() || !self.last_line_blank {
611            self.writer.write_str(buf)?;
612            self.writer.write_char('\n')?;
613        }
614        self.last_line_blank = buf.is_empty();
615        self.scratch.clear();
616        Ok(())
617    }
618
619    fn write_padding_to_scratch(&mut self) -> fmt::Result {
620        for item in self.stack.iter_mut() {
621            match item {
622                StackItem::Blockquote => {
623                    self.scratch.write_str(self.opts.blockquote_str)?;
624                    self.scratch.write_char(' ')?
625                }
626                StackItem::CodeIndent => self.scratch.write_str("    ")?,
627                StackItem::List(l, written, _) => {
628                    if *written {
629                        match l {
630                            None => {
631                                for _ in 0..self.opts.unordered_list_str.chars().count() + 1 {
632                                    self.scratch.write_char(' ')?;
633                                }
634                            }
635                            Some(n) => {
636                                for _ in 0..n.chars().count() + 2 {
637                                    self.scratch.write_char(' ')?;
638                                }
639                            }
640                        }
641                    } else {
642                        *written = true;
643                        match l {
644                            None => {
645                                self.scratch.write_str(self.opts.unordered_list_str)?;
646                                self.scratch.write_char(' ')?
647                            }
648                            Some(n) => {
649                                self.scratch.write_str(n)?;
650                                self.scratch.write_str(". ")?
651                            }
652                        }
653                    }
654                }
655            }
656        }
657        Ok(())
658    }
659}
660
661struct Table {
662    alignments: Vec<Alignment>,
663    head: Vec<String>,
664    body: Vec<Vec<String>>,
665}
666
667impl Table {
668    fn new(alignments: Vec<Alignment>) -> Self {
669        Table {
670            alignments,
671            head: Vec::new(),
672            body: Vec::new(),
673        }
674    }
675
676    fn column_widths(&self) -> Vec<usize> {
677        self.head
678            .iter()
679            .enumerate()
680            .map(|(i, h)| {
681                self.body
682                    .iter()
683                    .map(|b| b.get(i).map(|b| b.chars().count()).unwrap_or(0))
684                    .max()
685                    .unwrap_or_default()
686                    .max(h.chars().count())
687                    .max(3)
688            })
689            .collect()
690    }
691}
692
693struct Reference {
694    label: String,
695    dest: String,
696    title: Option<String>,
697}