cmark_writer/
writer.rs

1//! CommonMark writer implementation.
2//!
3//! This module provides functionality to convert AST nodes to CommonMark format text.
4//! The main component is the CommonMarkWriter class, which serializes AST nodes to CommonMark-compliant text.
5
6use crate::ast::{Alignment, ListItem, Node};
7use std::{
8    cmp::max,
9    fmt::{self},
10};
11
12/// CommonMark formatting options
13#[derive(Debug, Clone)]
14pub struct WriterOptions {
15    /// Whether to enable strict mode (strictly following CommonMark specification)
16    pub strict: bool,
17    /// Hard break mode (true uses two spaces followed by a newline, false uses backslash followed by a newline)
18    pub hard_break_spaces: bool,
19    /// Number of spaces to use for indentation levels
20    pub indent_spaces: usize,
21}
22
23impl Default for WriterOptions {
24    fn default() -> Self {
25        Self {
26            strict: true,
27            hard_break_spaces: true,
28            indent_spaces: 4,
29        }
30    }
31}
32
33/// CommonMark writer
34///
35/// This struct is responsible for serializing AST nodes to CommonMark-compliant text.
36#[derive(Debug)]
37pub struct CommonMarkWriter {
38    options: WriterOptions,
39    buffer: String,
40    /// Current indentation level
41    indent_level: usize,
42}
43
44impl CommonMarkWriter {
45    /// Create a new CommonMark writer with default options
46    ///
47    /// # Example
48    ///
49    /// ```
50    /// use cmark_writer::writer::CommonMarkWriter;
51    /// use cmark_writer::ast::Node;
52    ///
53    /// let mut writer = CommonMarkWriter::new();
54    /// writer.write(&Node::Text("Hello".to_string())).unwrap();
55    /// assert_eq!(writer.into_string(), "Hello");
56    /// ```
57    pub fn new() -> Self {
58        Self::with_options(WriterOptions::default())
59    }
60
61    /// Create a new CommonMark writer with specified options
62    ///
63    /// # Parameters
64    ///
65    /// * `options` - Custom CommonMark formatting options
66    ///
67    /// # Example
68    ///
69    /// ```
70    /// use cmark_writer::writer::{CommonMarkWriter, WriterOptions};
71    ///
72    /// let options = WriterOptions {
73    ///     strict: true,
74    ///     hard_break_spaces: false,  // Use backslash for line breaks
75    ///     indent_spaces: 2,          // Use 2 spaces for indentation
76    /// };
77    /// let writer = CommonMarkWriter::with_options(options);
78    /// ```
79    pub fn with_options(options: WriterOptions) -> Self {
80        Self {
81            options,
82            buffer: String::new(),
83            indent_level: 0,
84        }
85    }
86
87    /// Write an AST node as CommonMark format
88    ///
89    /// # Parameters
90    ///
91    /// * `node` - The AST node to write
92    ///
93    /// # Returns
94    ///
95    /// If writing succeeds, returns `Ok(())`, otherwise returns `Err(fmt::Error)`
96    ///
97    /// # Example
98    ///
99    /// ```
100    /// use cmark_writer::writer::CommonMarkWriter;
101    /// use cmark_writer::ast::Node;
102    ///
103    /// let mut writer = CommonMarkWriter::new();
104    /// writer.write(&Node::Text("Hello".to_string())).unwrap();
105    /// ```
106    pub fn write(&mut self, node: &Node) -> fmt::Result {
107        match node {
108            Node::Document(children) => self.write_document(children),
109            Node::Heading { level, content } => self.write_heading(*level, content),
110            Node::Paragraph(content) => self.write_paragraph(content),
111            Node::BlockQuote(content) => self.write_blockquote(content),
112            Node::CodeBlock { language, content } => self.write_code_block(language, content),
113            Node::UnorderedList(items) => self.write_unordered_list(items),
114            Node::OrderedList { start, items } => self.write_ordered_list(*start, items),
115            Node::ThematicBreak => self.write_thematic_break(),
116            Node::Table {
117                headers,
118                rows,
119                alignments,
120            } => self.write_table(headers, rows, alignments),
121            Node::Link {
122                url,
123                title,
124                content,
125            } => self.write_link(url, title, content),
126            Node::Image { url, title, alt } => self.write_image(url, title, alt),
127            Node::Emphasis(content) => self.write_emphasis(content),
128            Node::Strong(content) => self.write_strong(content),
129            Node::Strike(content) => self.write_strike(content),
130            Node::InlineCode(content) => self.write_inline_code(content),
131            Node::Text(content) => self.write_text(content),
132            Node::Inline(content) => self.write_inline(content),
133            Node::Html(content) => self.write_html(content),
134            Node::HtmlElement(element) => self.write_html_element(element),
135            Node::SoftBreak => self.write_soft_break(),
136            Node::HardBreak => self.write_hard_break(),
137        }
138    }
139
140    /// Write a document node
141    fn write_document(&mut self, children: &[Node]) -> fmt::Result {
142        for (i, child) in children.iter().enumerate() {
143            self.write(child)?;
144            if i < children.len() - 1 {
145                self.write_str("\n\n")?;
146            }
147        }
148        Ok(())
149    }
150
151    /// Write a heading node
152    fn write_heading(&mut self, level: u8, content: &[Node]) -> fmt::Result {
153        if !(1..=6).contains(&level) {
154            return Err(fmt::Error);
155        }
156
157        for _ in 0..level {
158            self.write_char('#')?;
159        }
160        self.write_char(' ')?;
161
162        for (i, node) in content.iter().enumerate() {
163            self.write(node)?;
164            if i < content.len() - 1 && !matches!(node, Node::SoftBreak | Node::HardBreak) {
165                self.write_char(' ')?;
166            }
167        }
168
169        Ok(())
170    }
171    /// Write a paragraph node
172    fn write_paragraph(&mut self, content: &[Node]) -> fmt::Result {
173        let mut prev_is_inline = false;
174
175        for (i, node) in content.iter().enumerate() {
176            // Check if the current node is an inline element
177            let is_inline = self.is_inline_element(node);
178
179            // If both current and previous nodes are inline elements, and it's not the first element,
180            // ensure there's no line break between them
181            if prev_is_inline
182                && is_inline
183                && i > 0
184                && !matches!(node, Node::SoftBreak | Node::HardBreak)
185            {
186                // Don't add extra whitespace to prevent incorrect line breaks
187            } else if i > 0 {
188                // Non-consecutive inline elements, add normal line break and indentation
189                self.write_char('\n')?;
190                // Add appropriate indentation (current indent level)
191                for _ in 0..(self.indent_level * self.options.indent_spaces) {
192                    self.write_char(' ')?;
193                }
194            }
195
196            self.write(node)?;
197            prev_is_inline = is_inline;
198        }
199        Ok(())
200    }
201
202    /// Write a blockquote node
203    fn write_blockquote(&mut self, content: &[Node]) -> fmt::Result {
204        self.indent_level += 1;
205
206        for (i, node) in content.iter().enumerate() {
207            self.write_str("> ")?;
208            self.write(node)?;
209            if i < content.len() - 1 {
210                self.write_str("\n> \n")?;
211            }
212        }
213
214        self.indent_level -= 1;
215        Ok(())
216    }
217
218    /// Write a code block node
219    fn write_code_block(&mut self, language: &Option<String>, content: &str) -> fmt::Result {
220        let mut max_backticks = 0;
221        let mut current = 0;
222        for c in content.chars() {
223            if c == '`' {
224                current += 1;
225                if current > max_backticks {
226                    max_backticks = current;
227                }
228            } else {
229                current = 0;
230            }
231        }
232        let fence_len = max(max_backticks + 1, 3);
233        let fence = "`".repeat(fence_len);
234
235        self.write_str(&fence)?;
236        if let Some(lang) = language {
237            self.write_str(lang)?;
238        }
239        self.write_char('\n')?;
240        self.write_str(content)?;
241
242        // Ensure content ends with a newline
243        if !content.ends_with('\n') {
244            self.write_char('\n')?;
245        }
246
247        self.write_str(&fence)?;
248        Ok(())
249    }
250
251    /// Write an unordered list node
252    fn write_unordered_list(&mut self, items: &[ListItem]) -> fmt::Result {
253        for (i, item) in items.iter().enumerate() {
254            self.write_list_item(item, "- ")?;
255            if i < items.len() - 1 {
256                self.write_char('\n')?;
257            }
258        }
259        Ok(())
260    }
261
262    /// Write an ordered list node
263    fn write_ordered_list(&mut self, start: u32, items: &[ListItem]) -> fmt::Result {
264        for (i, item) in items.iter().enumerate() {
265            let num = start as usize + i;
266            let prefix = format!("{}. ", num);
267            self.write_list_item(item, &prefix)?;
268            if i < items.len() - 1 {
269                self.write_char('\n')?;
270            }
271        }
272        Ok(())
273    }
274
275    /// Write a list item
276    fn write_list_item(&mut self, item: &ListItem, prefix: &str) -> fmt::Result {
277        // Apply indentation based on current level
278        for _ in 0..(self.indent_level * self.options.indent_spaces) {
279            self.write_char(' ')?;
280        }
281        self.write_str(prefix)?;
282
283        if item.is_task {
284            if item.task_completed {
285                self.write_str("[x] ")?;
286            } else {
287                self.write_str("[ ] ")?;
288            }
289        }
290
291        self.indent_level += 1;
292
293        // Track whether the previous element was an inline element
294        let mut prev_is_inline = false;
295
296        for (i, node) in item.content.iter().enumerate() {
297            // Determine if the current node is an inline element
298            let is_inline = self.is_inline_element(node);
299            let is_list = matches!(node, Node::OrderedList { .. } | Node::UnorderedList(..));
300
301            // Nested lists need special line break handling
302            if is_list {
303                if i > 0 {
304                    self.write_char('\n')?;
305                }
306                self.write(node)?;
307                prev_is_inline = false;
308                continue;
309            }
310
311            // If both previous and current are inline elements, prevent incorrect line breaks
312            if prev_is_inline && is_inline {
313                // Don't add extra separators to prevent incorrect line breaks for inline elements
314            } else if i > 0 {
315                // Non-consecutive inline elements, add normal line break and indentation
316                self.write_char('\n')?;
317                // Add appropriate indentation (list item prefix length + current indent level)
318                let prefix_length = prefix.len() + if item.is_task { 4 } else { 0 };
319                for _ in 0..(self.indent_level * self.options.indent_spaces) + prefix_length {
320                    self.write_char(' ')?;
321                }
322            }
323
324            self.write(node)?;
325            prev_is_inline = is_inline;
326        }
327
328        self.indent_level -= 1;
329        Ok(())
330    }
331
332    /// Write a thematic break (horizontal rule)
333    fn write_thematic_break(&mut self) -> fmt::Result {
334        self.write_str("---")
335    }
336
337    /// Check if the node contains a newline character and return an error if it does
338    fn check_no_newline(&self, node: &Node) -> fmt::Result {
339        if Self::node_contains_newline(node) {
340            return Err(fmt::Error);
341        }
342        Ok(())
343    }
344
345    /// Check if the node contains a newline character recursively
346    fn node_contains_newline(node: &Node) -> bool {
347        match node {
348            Node::Text(s) | Node::InlineCode(s) | Node::Html(s) => s.contains('\n'),
349            Node::Emphasis(children) | Node::Strong(children) | Node::Strike(children) => {
350                children.iter().any(Self::node_contains_newline)
351            }
352            Node::HtmlElement(element) => element.children.iter().any(Self::node_contains_newline),
353            Node::Link { content, .. } => content.iter().any(Self::node_contains_newline),
354            Node::Image { alt, .. } => alt.contains('\n'),
355            _ => false,
356        }
357    }
358
359    /// Write a table
360    fn write_table(
361        &mut self,
362        headers: &[Node],
363        rows: &[Vec<Node>],
364        alignments: &[Alignment],
365    ) -> fmt::Result {
366        // Write header
367        self.write_char('|')?;
368        for header in headers {
369            self.check_no_newline(header)?;
370            self.write_char(' ')?;
371            self.write(header)?;
372            self.write_str(" |")?;
373        }
374        self.write_char('\n')?;
375
376        // Write alignment row
377        self.write_char('|')?;
378        for alignment in alignments {
379            match alignment {
380                Alignment::None => self.write_str(" --- |")?,
381                Alignment::Left => self.write_str(" :--- |")?,
382                Alignment::Center => self.write_str(" :---: |")?,
383                Alignment::Right => self.write_str(" ---: |")?,
384            }
385        }
386        self.write_char('\n')?;
387
388        // Write table content
389        for row in rows {
390            self.write_char('|')?;
391            for cell in row {
392                self.check_no_newline(cell)?;
393                self.write_char(' ')?;
394                self.write(cell)?;
395                self.write_str(" |")?;
396            }
397            self.write_char('\n')?;
398        }
399
400        Ok(())
401    }
402
403    /// Write a link
404    fn write_link(&mut self, url: &str, title: &Option<String>, content: &[Node]) -> fmt::Result {
405        for node in content {
406            self.check_no_newline(node)?;
407        }
408        self.write_char('[')?;
409
410        for node in content {
411            self.write(node)?;
412        }
413
414        self.write_str("](")?;
415        self.write_str(url)?;
416
417        if let Some(title_text) = title {
418            self.write_str(" \"")?;
419            self.write_str(title_text)?;
420            self.write_char('"')?;
421        }
422
423        self.write_char(')')
424    }
425
426    /// Write an image
427    fn write_image(&mut self, url: &str, title: &Option<String>, alt: &str) -> fmt::Result {
428        self.check_no_newline(&Node::Text(alt.to_string()))?;
429        self.write_str("![")?;
430        self.write_str(alt)?;
431        self.write_str("](")?;
432        self.write_str(url)?;
433
434        if let Some(title_text) = title {
435            self.write_str(" \"")?;
436            self.write_str(title_text)?;
437            self.write_char('"')?;
438        }
439
440        self.write_char(')')
441    }
442
443    /// Write emphasis (italic)
444    fn write_emphasis(&mut self, content: &[Node]) -> fmt::Result {
445        for node in content {
446            self.check_no_newline(node)?;
447        }
448        self.write_char('*')?;
449
450        for node in content {
451            self.write(node)?;
452        }
453
454        self.write_char('*')
455    }
456
457    /// Write strong emphasis (bold)
458    fn write_strong(&mut self, content: &[Node]) -> fmt::Result {
459        for node in content {
460            self.check_no_newline(node)?;
461        }
462        self.write_str("**")?;
463
464        for node in content {
465            self.write(node)?;
466        }
467
468        self.write_str("**")
469    }
470
471    /// Write a strikethrough text
472    fn write_strike(&mut self, content: &[Node]) -> fmt::Result {
473        for node in content {
474            self.check_no_newline(node)?;
475        }
476        self.write_str("~~")?;
477
478        for node in content {
479            self.write(node)?;
480        }
481
482        self.write_str("~~")
483    }
484
485    /// Write inline code
486    fn write_inline_code(&mut self, content: &str) -> fmt::Result {
487        self.check_no_newline(&Node::InlineCode(content.to_string()))?;
488        self.write_char('`')?;
489        self.write_str(content)?;
490        self.write_char('`')
491    }
492
493    /// Write plain text
494    fn write_text(&mut self, content: &str) -> fmt::Result {
495        self.check_no_newline(&Node::Text(content.to_string()))?;
496        // Escape special characters
497        let escaped = content
498            .replace('\\', "\\\\")
499            .replace('*', "\\*")
500            .replace('_', "\\_")
501            .replace('[', "\\[")
502            .replace(']', "\\]")
503            .replace('<', "\\<")
504            .replace('>', "\\>")
505            .replace('`', "\\`");
506
507        self.write_str(&escaped)
508    }
509
510    /// Write an HTML element with attributes and children
511    fn write_html_element(&mut self, element: &crate::ast::HtmlElement) -> fmt::Result {
512        self.write_char('<')?;
513        self.write_str(&element.tag)?;
514
515        // Write attributes
516        for attr in &element.attributes {
517            self.write_char(' ')?;
518            self.write_str(&attr.name)?;
519            self.write_str("=\"")?;
520            // Escape quotes in attribute values
521            let escaped_value = attr.value.replace('"', "&quot;");
522            self.write_str(&escaped_value)?;
523            self.write_char('"')?;
524        }
525
526        if element.self_closing {
527            // Self-closing tag like <img />
528            self.write_str(" />")?;
529            return Ok(());
530        }
531
532        self.write_char('>')?;
533
534        // Process children
535        for child in &element.children {
536            self.write(child)?;
537        }
538
539        // Close tag
540        self.write_str("</")?;
541        self.write_str(&element.tag)?;
542        self.write_char('>')?;
543
544        Ok(())
545    }
546
547    /// Write HTML
548    fn write_html(&mut self, content: &str) -> fmt::Result {
549        self.write_str(content)
550    }
551
552    /// Write into inline container
553    fn write_inline(&mut self, content: &[Node]) -> fmt::Result {
554        for node in content {
555            self.check_no_newline(node)?;
556        }
557        for node in content.iter() {
558            self.write(node)?;
559        }
560        Ok(())
561    }
562
563    /// Write a soft line break
564    fn write_soft_break(&mut self) -> fmt::Result {
565        self.write_char('\n')
566    }
567
568    /// Write a hard line break
569    fn write_hard_break(&mut self) -> fmt::Result {
570        if self.options.hard_break_spaces {
571            self.write_str("  \n")
572        } else {
573            self.write_str("\\\n")
574        }
575    }
576
577    /// Check if a node is an inline element that shouldn't be broken across lines
578    fn is_inline_element(&self, node: &Node) -> bool {
579        matches!(
580            node,
581            Node::Text(_)
582                | Node::Emphasis(_)
583                | Node::Strong(_)
584                | Node::Strike(_)
585                | Node::InlineCode(_)
586                | Node::Link { .. }
587                | Node::Image { .. }
588                | Node::HtmlElement(_)
589        )
590    }
591
592    /// Get the generated CommonMark format text
593    ///
594    /// Consumes the writer and returns the generated string
595    ///
596    /// # Example
597    ///
598    /// ```
599    /// use cmark_writer::writer::CommonMarkWriter;
600    /// use cmark_writer::ast::Node;
601    ///
602    /// let mut writer = CommonMarkWriter::new();
603    /// writer.write(&Node::Text("Hello".to_string())).unwrap();
604    /// let result = writer.into_string();
605    /// assert_eq!(result, "Hello");
606    /// ```
607    pub fn into_string(self) -> String {
608        self.buffer
609    }
610
611    /// Write a character to the buffer
612    fn write_char(&mut self, c: char) -> fmt::Result {
613        self.buffer.push(c);
614        Ok(())
615    }
616
617    /// Write a string to the buffer
618    fn write_str(&mut self, s: &str) -> fmt::Result {
619        self.buffer.push_str(s);
620        Ok(())
621    }
622}
623
624impl Default for CommonMarkWriter {
625    fn default() -> Self {
626        Self::new()
627    }
628}
629
630// Implement Display trait
631impl fmt::Display for Node {
632    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
633        let mut writer = CommonMarkWriter::new();
634        writer.write(self)?;
635        write!(f, "{}", writer.into_string())
636    }
637}