md_formatter/
formatter.rs

1use pulldown_cmark::{CowStr, Event, Tag};
2use std::str::FromStr;
3
4/// How to handle prose wrapping
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub enum WrapMode {
7    /// Wrap prose if it exceeds the print width
8    Always,
9    /// Un-wrap each block of prose into one line
10    Never,
11    /// Do nothing, leave prose as-is (default)
12    #[default]
13    Preserve,
14}
15
16impl FromStr for WrapMode {
17    type Err = String;
18
19    fn from_str(s: &str) -> Result<Self, Self::Err> {
20        match s.to_lowercase().as_str() {
21            "always" => Ok(Self::Always),
22            "never" => Ok(Self::Never),
23            "preserve" => Ok(Self::Preserve),
24            _ => Err(format!(
25                "Invalid wrap mode: '{}'. Expected: always, never, preserve",
26                s
27            )),
28        }
29    }
30}
31
32/// How to handle ordered list numbering
33#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
34pub enum OrderedListMode {
35    /// Renumber items sequentially (1, 2, 3, ...) - default
36    #[default]
37    Ascending,
38    /// Use 1. for all items
39    One,
40    // Note: Preserve mode is not currently possible because pulldown-cmark
41    // doesn't provide the original item numbers in the event stream
42}
43
44impl FromStr for OrderedListMode {
45    type Err = String;
46
47    fn from_str(s: &str) -> Result<Self, Self::Err> {
48        match s.to_lowercase().as_str() {
49            "ascending" => Ok(Self::Ascending),
50            "one" => Ok(Self::One),
51            _ => Err(format!(
52                "Invalid ordered list mode: '{}'. Expected: ascending, one",
53                s
54            )),
55        }
56    }
57}
58
59/// Represents an inline element that can be buffered before wrapping
60#[derive(Debug, Clone)]
61enum InlineElement {
62    /// Regular text content
63    Text(String),
64    /// Inline code (`code`)
65    Code(String),
66    /// Start of emphasis (*)
67    EmphasisStart,
68    /// End of emphasis (*)
69    EmphasisEnd,
70    /// Start of strong (**)
71    StrongStart,
72    /// End of strong (**)
73    StrongEnd,
74    /// Start of strikethrough (~~)
75    StrikethroughStart,
76    /// End of strikethrough (~~)
77    StrikethroughEnd,
78    /// Start of link ([)
79    LinkStart,
80    /// End of link with URL](url)
81    LinkEnd(String),
82    /// Start of image (![)
83    ImageStart,
84    /// End of image with URL and optional title](url "title")
85    ImageEnd { url: String, title: String },
86    /// Hard break from source (preserve as `  \n`)
87    HardBreak,
88    /// Soft break from source (treat as space)
89    SoftBreak,
90}
91
92/// Context for tracking where we are in the document
93#[derive(Debug, Clone, PartialEq)]
94pub enum Context {
95    Paragraph,
96    Heading { level: u32 },
97    List { ordered: bool, item_count: usize },
98    ListItem,
99    Blockquote,
100    CodeBlock,
101    Strong,
102    Emphasis,
103    Strikethrough,
104    Link { url: String },
105    Image { url: String, title: String },
106}
107
108/// Main formatter struct
109pub struct Formatter {
110    /// Final output
111    output: String,
112    /// Target line width
113    line_width: usize,
114    /// How to handle prose wrapping
115    wrap_mode: WrapMode,
116    /// How to handle ordered list numbering
117    ordered_list_mode: OrderedListMode,
118    /// Buffer for accumulating inline elements before wrapping
119    inline_buffer: Vec<InlineElement>,
120    /// Context stack for tracking nesting
121    context_stack: Vec<Context>,
122    /// Current list nesting depth
123    list_depth: usize,
124    /// Current blockquote nesting depth
125    blockquote_depth: usize,
126    /// Are we inside a code block?
127    in_code_block: bool,
128}
129
130impl Formatter {
131    /// Create a new formatter with the given line width and wrap mode
132    pub fn new(line_width: usize) -> Self {
133        Self::with_options(line_width, WrapMode::default(), OrderedListMode::default())
134    }
135
136    /// Create a new formatter with the given line width and wrap mode
137    pub fn with_wrap_mode(line_width: usize, wrap_mode: WrapMode) -> Self {
138        Self::with_options(line_width, wrap_mode, OrderedListMode::default())
139    }
140
141    /// Create a new formatter with all options
142    pub fn with_options(
143        line_width: usize,
144        wrap_mode: WrapMode,
145        ordered_list_mode: OrderedListMode,
146    ) -> Self {
147        Self {
148            output: String::new(),
149            line_width,
150            wrap_mode,
151            ordered_list_mode,
152            inline_buffer: Vec::new(),
153            context_stack: Vec::new(),
154            list_depth: 0,
155            blockquote_depth: 0,
156            in_code_block: false,
157        }
158    }
159
160    /// Format markdown from a list of events
161    pub fn format(&mut self, events: Vec<Event>) -> String {
162        for event in events {
163            self.process_event(event);
164        }
165
166        // Flush any remaining content
167        self.flush_inline_buffer();
168
169        // Ensure single trailing newline
170        let result = self.output.trim_end().to_string();
171        if result.is_empty() {
172            result
173        } else {
174            result + "\n"
175        }
176    }
177
178    fn process_event(&mut self, event: Event) {
179        match event {
180            Event::Start(tag) => self.handle_start_tag(tag),
181            Event::End(tag) => self.handle_end_tag(tag),
182            Event::Text(text) => self.handle_text(text),
183            Event::Code(code) => self.handle_inline_code(code),
184            Event::Html(html) => self.handle_html(html),
185            Event::SoftBreak => self.handle_soft_break(),
186            Event::HardBreak => self.handle_hard_break(),
187            Event::Rule => self.handle_rule(),
188            Event::FootnoteReference(_) => {}
189            Event::TaskListMarker(checked) => self.handle_task_list_marker(checked),
190        }
191    }
192
193    /// Get the prefix for the current line (blockquote markers)
194    fn get_line_prefix(&self) -> String {
195        let mut prefix = String::new();
196        for _ in 0..self.blockquote_depth {
197            prefix.push_str("> ");
198        }
199        prefix
200    }
201
202    /// Get the continuation indent for wrapped lines
203    fn get_continuation_indent(&self) -> String {
204        let mut indent = self.get_line_prefix();
205
206        // Add list indentation for continuation lines
207        if self.list_depth > 0 {
208            // Each list level needs indentation, plus space for the marker
209            indent.push_str(&"  ".repeat(self.list_depth));
210        }
211
212        indent
213    }
214
215    /// Convert inline buffer to a flat string (for wrapping), preserving structure
216    fn render_inline_buffer(&self) -> String {
217        let mut result = String::new();
218        for elem in &self.inline_buffer {
219            match elem {
220                InlineElement::Text(s) => result.push_str(s),
221                InlineElement::Code(s) => {
222                    result.push('`');
223                    result.push_str(s);
224                    result.push('`');
225                }
226                InlineElement::EmphasisStart => result.push('*'),
227                InlineElement::EmphasisEnd => result.push('*'),
228                InlineElement::StrongStart => result.push_str("**"),
229                InlineElement::StrongEnd => result.push_str("**"),
230                InlineElement::StrikethroughStart => result.push_str("~~"),
231                InlineElement::StrikethroughEnd => result.push_str("~~"),
232                InlineElement::LinkStart => result.push('['),
233                InlineElement::LinkEnd(url) => {
234                    result.push_str("](");
235                    result.push_str(url);
236                    result.push(')');
237                }
238                InlineElement::ImageStart => result.push_str("!["),
239                InlineElement::ImageEnd { url, title } => {
240                    result.push_str("](");
241                    result.push_str(url);
242                    if !title.is_empty() {
243                        result.push_str(" \"");
244                        result.push_str(title);
245                        result.push('"');
246                    }
247                    result.push(')');
248                }
249                InlineElement::HardBreak => result.push('\u{FFFF}'), // Placeholder for hard break
250                InlineElement::SoftBreak => {
251                    match self.wrap_mode {
252                        WrapMode::Preserve => result.push('\u{FFFE}'), // Placeholder for preserved line break
253                        WrapMode::Always | WrapMode::Never => result.push(' '),
254                    }
255                }
256            }
257        }
258        result
259    }
260
261    /// Wrap text to fit within line_width
262    /// Returns wrapped text with proper line prefixes
263    fn wrap_text(&self, text: &str, first_line_prefix: &str, continuation_prefix: &str) -> String {
264        let hard_break_placeholder = "\u{FFFF}";
265        let soft_break_placeholder = "\u{FFFE}";
266
267        match self.wrap_mode {
268            WrapMode::Preserve => {
269                // Preserve mode: keep line breaks as-is, just add prefixes
270                self.wrap_text_preserve(
271                    text,
272                    first_line_prefix,
273                    continuation_prefix,
274                    hard_break_placeholder,
275                    soft_break_placeholder,
276                )
277            }
278            WrapMode::Never => {
279                // Never mode: unwrap everything to single lines (per paragraph)
280                self.wrap_text_never(text, first_line_prefix, hard_break_placeholder)
281            }
282            WrapMode::Always => {
283                // Always mode: reflow text to fit width
284                self.wrap_text_always(
285                    text,
286                    first_line_prefix,
287                    continuation_prefix,
288                    hard_break_placeholder,
289                )
290            }
291        }
292    }
293
294    /// Preserve mode: keep original line breaks
295    fn wrap_text_preserve(
296        &self,
297        text: &str,
298        first_line_prefix: &str,
299        continuation_prefix: &str,
300        hard_break_placeholder: &str,
301        soft_break_placeholder: &str,
302    ) -> String {
303        let mut result = String::new();
304        let mut is_first_line = true;
305
306        // Split on both hard and soft break placeholders
307        // We need to track which type of break it was
308        let mut remaining = text;
309
310        while !remaining.is_empty() {
311            // Find the next break (either hard or soft)
312            let hard_pos = remaining.find(hard_break_placeholder);
313            let soft_pos = remaining.find(soft_break_placeholder);
314
315            let (segment, break_type, rest) = match (hard_pos, soft_pos) {
316                (Some(h), Some(s)) if h < s => {
317                    let (seg, rest) = remaining.split_at(h);
318                    (seg, Some("hard"), &rest[hard_break_placeholder.len()..])
319                }
320                (Some(h), Some(s)) if s < h => {
321                    let (seg, rest) = remaining.split_at(s);
322                    (seg, Some("soft"), &rest[soft_break_placeholder.len()..])
323                }
324                (Some(h), None) => {
325                    let (seg, rest) = remaining.split_at(h);
326                    (seg, Some("hard"), &rest[hard_break_placeholder.len()..])
327                }
328                (None, Some(s)) => {
329                    let (seg, rest) = remaining.split_at(s);
330                    (seg, Some("soft"), &rest[soft_break_placeholder.len()..])
331                }
332                (Some(h), Some(_)) => {
333                    // h == s, shouldn't happen, but handle it
334                    let (seg, rest) = remaining.split_at(h);
335                    (seg, Some("hard"), &rest[hard_break_placeholder.len()..])
336                }
337                (None, None) => (remaining, None, ""),
338            };
339
340            // Add the prefix
341            let prefix = if is_first_line {
342                first_line_prefix
343            } else {
344                continuation_prefix
345            };
346            result.push_str(prefix);
347
348            // Add the segment content (normalize internal whitespace but preserve words)
349            let words: Vec<&str> = segment.split_whitespace().collect();
350            result.push_str(&words.join(" "));
351
352            // Add the appropriate line ending
353            match break_type {
354                Some("hard") => {
355                    result.push_str("  \n");
356                }
357                Some("soft") => {
358                    result.push('\n');
359                }
360                None => {}
361                _ => {}
362            }
363
364            remaining = rest;
365            is_first_line = false;
366        }
367
368        result
369    }
370
371    /// Never mode: unwrap to single line
372    fn wrap_text_never(
373        &self,
374        text: &str,
375        first_line_prefix: &str,
376        hard_break_placeholder: &str,
377    ) -> String {
378        // Split on hard breaks - those we preserve
379        let segments: Vec<&str> = text.split(hard_break_placeholder).collect();
380        let mut result = String::new();
381
382        for (seg_idx, segment) in segments.iter().enumerate() {
383            let words: Vec<&str> = segment.split_whitespace().collect();
384
385            if seg_idx == 0 {
386                result.push_str(first_line_prefix);
387            }
388
389            result.push_str(&words.join(" "));
390
391            // Add hard break if not the last segment
392            if seg_idx < segments.len() - 1 {
393                result.push_str("  \n");
394                result.push_str(first_line_prefix);
395            }
396        }
397
398        result
399    }
400
401    /// Always mode: reflow text to fit width (original behavior)
402    fn wrap_text_always(
403        &self,
404        text: &str,
405        first_line_prefix: &str,
406        continuation_prefix: &str,
407        hard_break_placeholder: &str,
408    ) -> String {
409        // First, handle hard breaks by splitting on them
410        let segments: Vec<&str> = text.split(hard_break_placeholder).collect();
411
412        let mut result = String::new();
413
414        for (seg_idx, segment) in segments.iter().enumerate() {
415            // Normalize whitespace within this segment
416            let words: Vec<&str> = segment.split_whitespace().collect();
417
418            if words.is_empty() {
419                if seg_idx < segments.len() - 1 {
420                    // There was a hard break here, add it
421                    if !result.is_empty() {
422                        result.push_str("  \n");
423                        result.push_str(continuation_prefix);
424                    }
425                }
426                continue;
427            }
428
429            let prefix = if seg_idx == 0 && result.is_empty() {
430                first_line_prefix
431            } else {
432                continuation_prefix
433            };
434
435            let mut current_line = if result.is_empty() || result.ends_with('\n') {
436                prefix.to_string()
437            } else {
438                String::new()
439            };
440
441            let mut first_word_on_line = result.is_empty() || result.ends_with('\n');
442
443            for word in &words {
444                let space_needed = if first_word_on_line { 0 } else { 1 };
445                let would_be_length = current_line.len() + space_needed + word.len();
446
447                if !first_word_on_line && would_be_length > self.line_width {
448                    // Wrap to new line (use plain \n - NOT hard break)
449                    result.push_str(&current_line);
450                    result.push('\n');
451                    current_line = continuation_prefix.to_string();
452                    current_line.push_str(word);
453                    first_word_on_line = false;
454                } else {
455                    if !first_word_on_line {
456                        current_line.push(' ');
457                    }
458                    current_line.push_str(word);
459                    first_word_on_line = false;
460                }
461            }
462
463            result.push_str(&current_line);
464
465            // Add hard break if not the last segment
466            if seg_idx < segments.len() - 1 {
467                result.push_str("  \n");
468                result.push_str(continuation_prefix);
469            }
470        }
471
472        result
473    }
474
475    /// Flush the inline buffer, wrapping text appropriately
476    fn flush_inline_buffer(&mut self) {
477        if self.inline_buffer.is_empty() {
478            return;
479        }
480
481        let rendered = self.render_inline_buffer();
482
483        if rendered.trim().is_empty() {
484            self.inline_buffer.clear();
485            return;
486        }
487
488        let prefix = self.get_line_prefix();
489        let continuation = self.get_continuation_indent();
490
491        let wrapped = self.wrap_text(&rendered, &prefix, &continuation);
492        self.output.push_str(&wrapped);
493        self.inline_buffer.clear();
494    }
495
496    /// Ensure there's a blank line before the next block element
497    fn ensure_blank_line(&mut self) {
498        if self.output.is_empty() {
499            return;
500        }
501        if !self.output.ends_with("\n\n") {
502            if self.output.ends_with('\n') {
503                self.output.push('\n');
504            } else {
505                self.output.push_str("\n\n");
506            }
507        }
508    }
509
510    fn handle_start_tag(&mut self, tag: Tag) {
511        match tag {
512            Tag::Heading(level, _, _) => {
513                self.flush_inline_buffer();
514                self.ensure_blank_line();
515                let level_num = level as usize;
516                self.output.push_str(&"#".repeat(level_num));
517                self.output.push(' ');
518                self.context_stack.push(Context::Heading {
519                    level: level_num as u32,
520                });
521            }
522
523            Tag::Paragraph => {
524                self.flush_inline_buffer();
525                // Don't add blank line if we're directly inside a list item
526                // (list items implicitly contain paragraphs)
527                let in_list_item = self.context_stack.last() == Some(&Context::ListItem);
528                if !in_list_item {
529                    self.ensure_blank_line();
530                }
531                // Don't add prefix here - wrap_text will handle it
532                self.context_stack.push(Context::Paragraph);
533            }
534
535            Tag::List(first_item_number) => {
536                self.flush_inline_buffer();
537                // Only add blank line before top-level lists, not nested ones
538                // A nested list is one that starts while we're inside a ListItem
539                let in_list_item = self.context_stack.last() == Some(&Context::ListItem);
540                if !in_list_item {
541                    self.ensure_blank_line();
542                }
543                self.list_depth += 1;
544                self.context_stack.push(Context::List {
545                    ordered: first_item_number.is_some(),
546                    item_count: 0,
547                });
548            }
549
550            Tag::Item => {
551                self.flush_inline_buffer();
552                if !self.output.ends_with('\n') && !self.output.is_empty() {
553                    self.output.push('\n');
554                }
555
556                // Increment the item count for the current list
557                let (is_ordered, item_number) = self
558                    .context_stack
559                    .iter_mut()
560                    .rev()
561                    .find_map(|c| match c {
562                        Context::List {
563                            ordered,
564                            item_count,
565                        } => {
566                            *item_count += 1;
567                            Some((*ordered, *item_count))
568                        }
569                        _ => None,
570                    })
571                    .unwrap_or((false, 1));
572
573                // Add blockquote prefix
574                let prefix = self.get_line_prefix();
575                self.output.push_str(&prefix);
576
577                // Add list indentation (for nested lists)
578                if self.list_depth > 1 {
579                    self.output.push_str(&"  ".repeat(self.list_depth - 1));
580                }
581
582                // Add list marker
583                if is_ordered {
584                    match self.ordered_list_mode {
585                        OrderedListMode::One => self.output.push_str("1. "),
586                        OrderedListMode::Ascending => {
587                            self.output.push_str(&format!("{}. ", item_number));
588                        }
589                    }
590                } else {
591                    self.output.push_str("- ");
592                }
593
594                self.context_stack.push(Context::ListItem);
595            }
596
597            Tag::BlockQuote => {
598                self.flush_inline_buffer();
599                self.ensure_blank_line();
600                self.blockquote_depth += 1;
601                self.context_stack.push(Context::Blockquote);
602            }
603
604            Tag::CodeBlock(kind) => {
605                self.flush_inline_buffer();
606                self.ensure_blank_line();
607                self.in_code_block = true;
608
609                // Extract language if specified
610                let lang = match kind {
611                    pulldown_cmark::CodeBlockKind::Fenced(lang) if !lang.is_empty() => {
612                        lang.to_string()
613                    }
614                    _ => String::new(),
615                };
616
617                self.output.push_str("```");
618                self.output.push_str(&lang);
619                self.output.push('\n');
620                self.context_stack.push(Context::CodeBlock);
621            }
622
623            Tag::Strong => {
624                self.inline_buffer.push(InlineElement::StrongStart);
625                self.context_stack.push(Context::Strong);
626            }
627
628            Tag::Emphasis => {
629                self.inline_buffer.push(InlineElement::EmphasisStart);
630                self.context_stack.push(Context::Emphasis);
631            }
632
633            Tag::Strikethrough => {
634                self.inline_buffer.push(InlineElement::StrikethroughStart);
635                self.context_stack.push(Context::Strikethrough);
636            }
637
638            Tag::Link(_, url, _) => {
639                self.inline_buffer.push(InlineElement::LinkStart);
640                self.context_stack.push(Context::Link {
641                    url: url.to_string(),
642                });
643            }
644
645            Tag::Image(_, url, title) => {
646                self.inline_buffer.push(InlineElement::ImageStart);
647                self.context_stack.push(Context::Image {
648                    url: url.to_string(),
649                    title: title.to_string(),
650                });
651            }
652
653            _ => {}
654        }
655    }
656
657    fn handle_end_tag(&mut self, tag: Tag) {
658        match tag {
659            Tag::Heading { .. } => {
660                self.flush_inline_buffer();
661                self.output.push('\n');
662                self.context_stack.pop();
663            }
664
665            Tag::Paragraph => {
666                self.flush_inline_buffer();
667                self.output.push('\n');
668                self.context_stack.pop();
669            }
670
671            Tag::List(_) => {
672                self.flush_inline_buffer();
673                if !self.output.ends_with('\n') {
674                    self.output.push('\n');
675                }
676                self.list_depth = self.list_depth.saturating_sub(1);
677                self.context_stack.pop();
678            }
679
680            Tag::Item => {
681                self.flush_inline_buffer();
682                self.context_stack.pop();
683            }
684
685            Tag::BlockQuote => {
686                self.flush_inline_buffer();
687                if !self.output.ends_with('\n') {
688                    self.output.push('\n');
689                }
690                self.blockquote_depth = self.blockquote_depth.saturating_sub(1);
691                self.context_stack.pop();
692            }
693
694            Tag::CodeBlock(_) => {
695                self.output.push_str("```\n");
696                self.in_code_block = false;
697                self.context_stack.pop();
698            }
699
700            Tag::Strong => {
701                self.inline_buffer.push(InlineElement::StrongEnd);
702                self.context_stack.pop();
703            }
704
705            Tag::Emphasis => {
706                self.inline_buffer.push(InlineElement::EmphasisEnd);
707                self.context_stack.pop();
708            }
709
710            Tag::Strikethrough => {
711                self.inline_buffer.push(InlineElement::StrikethroughEnd);
712                self.context_stack.pop();
713            }
714
715            Tag::Link(_, _, _) => {
716                // Get the URL from context
717                if let Some(Context::Link { url }) = self.context_stack.pop() {
718                    self.inline_buffer.push(InlineElement::LinkEnd(url));
719                }
720            }
721
722            Tag::Image(_, _, _) => {
723                // Get the URL and title from context
724                if let Some(Context::Image { url, title }) = self.context_stack.pop() {
725                    self.inline_buffer
726                        .push(InlineElement::ImageEnd { url, title });
727                }
728            }
729
730            _ => {}
731        }
732    }
733
734    fn handle_text(&mut self, text: CowStr) {
735        if self.in_code_block {
736            // Code blocks: preserve exactly
737            self.output.push_str(&text);
738        } else {
739            // Regular text: add to inline buffer
740            self.inline_buffer
741                .push(InlineElement::Text(text.to_string()));
742        }
743    }
744
745    fn handle_inline_code(&mut self, code: CowStr) {
746        self.inline_buffer
747            .push(InlineElement::Code(code.to_string()));
748    }
749
750    fn handle_html(&mut self, html: CowStr) {
751        self.flush_inline_buffer();
752        self.ensure_blank_line();
753        self.output.push_str(&html);
754        if !html.ends_with('\n') {
755            self.output.push('\n');
756        }
757    }
758
759    fn handle_soft_break(&mut self) {
760        if !self.in_code_block {
761            // Soft break = space (will be normalized during flush)
762            self.inline_buffer.push(InlineElement::SoftBreak);
763        }
764    }
765
766    fn handle_hard_break(&mut self) {
767        // Hard break from source - preserve it!
768        self.inline_buffer.push(InlineElement::HardBreak);
769    }
770
771    fn handle_rule(&mut self) {
772        self.flush_inline_buffer();
773        self.ensure_blank_line();
774        self.output.push_str("---\n");
775    }
776
777    fn handle_task_list_marker(&mut self, checked: bool) {
778        if checked {
779            self.inline_buffer
780                .push(InlineElement::Text("[x] ".to_string()));
781        } else {
782            self.inline_buffer
783                .push(InlineElement::Text("[ ] ".to_string()));
784        }
785    }
786}