md_formatter/
formatter.rs

1use pulldown_cmark::{CowStr, Event, Tag};
2use std::str::FromStr;
3
4/// How to handle prose wrapping
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
6pub enum WrapMode {
7    /// Wrap prose if it exceeds the print width
8    Always,
9    /// Un-wrap each block of prose into one line
10    Never,
11    /// Do nothing, leave prose as-is (default)
12    #[default]
13    Preserve,
14}
15
16impl FromStr for WrapMode {
17    type Err = String;
18
19    fn from_str(s: &str) -> Result<Self, Self::Err> {
20        match s.to_lowercase().as_str() {
21            "always" => Ok(Self::Always),
22            "never" => Ok(Self::Never),
23            "preserve" => Ok(Self::Preserve),
24            _ => Err(format!(
25                "Invalid wrap mode: '{}'. Expected: always, never, preserve",
26                s
27            )),
28        }
29    }
30}
31
32/// Represents an inline element that can be buffered before wrapping
33#[derive(Debug, Clone)]
34enum InlineElement {
35    /// Regular text content
36    Text(String),
37    /// Inline code (`code`)
38    Code(String),
39    /// Start of emphasis (*)
40    EmphasisStart,
41    /// End of emphasis (*)
42    EmphasisEnd,
43    /// Start of strong (**)
44    StrongStart,
45    /// End of strong (**)
46    StrongEnd,
47    /// Start of strikethrough (~~)
48    StrikethroughStart,
49    /// End of strikethrough (~~)
50    StrikethroughEnd,
51    /// Start of link ([)
52    LinkStart,
53    /// End of link with URL](url)
54    LinkEnd(String),
55    /// Start of image (![)
56    ImageStart,
57    /// End of image with URL and optional title](url "title")
58    ImageEnd { url: String, title: String },
59    /// Hard break from source (preserve as `  \n`)
60    HardBreak,
61    /// Soft break from source (treat as space)
62    SoftBreak,
63}
64
65/// Context for tracking where we are in the document
66#[derive(Debug, Clone, PartialEq)]
67pub enum Context {
68    Paragraph,
69    Heading { level: u32 },
70    List { ordered: bool },
71    ListItem,
72    Blockquote,
73    CodeBlock,
74    Strong,
75    Emphasis,
76    Strikethrough,
77    Link { url: String },
78    Image { url: String, title: String },
79}
80
81/// Main formatter struct
82pub struct Formatter {
83    /// Final output
84    output: String,
85    /// Target line width
86    line_width: usize,
87    /// How to handle prose wrapping
88    wrap_mode: WrapMode,
89    /// Buffer for accumulating inline elements before wrapping
90    inline_buffer: Vec<InlineElement>,
91    /// Context stack for tracking nesting
92    context_stack: Vec<Context>,
93    /// Current list nesting depth
94    list_depth: usize,
95    /// Current blockquote nesting depth
96    blockquote_depth: usize,
97    /// Are we inside a code block?
98    in_code_block: bool,
99}
100
101impl Formatter {
102    /// Create a new formatter with the given line width and wrap mode
103    pub fn new(line_width: usize) -> Self {
104        Self::with_wrap_mode(line_width, WrapMode::default())
105    }
106
107    /// Create a new formatter with the given line width and wrap mode
108    pub fn with_wrap_mode(line_width: usize, wrap_mode: WrapMode) -> Self {
109        Self {
110            output: String::new(),
111            line_width,
112            wrap_mode,
113            inline_buffer: Vec::new(),
114            context_stack: Vec::new(),
115            list_depth: 0,
116            blockquote_depth: 0,
117            in_code_block: false,
118        }
119    }
120
121    /// Format markdown from a list of events
122    pub fn format(&mut self, events: Vec<Event>) -> String {
123        for event in events {
124            self.process_event(event);
125        }
126
127        // Flush any remaining content
128        self.flush_inline_buffer();
129
130        // Ensure single trailing newline
131        let result = self.output.trim_end().to_string();
132        if result.is_empty() {
133            result
134        } else {
135            result + "\n"
136        }
137    }
138
139    fn process_event(&mut self, event: Event) {
140        match event {
141            Event::Start(tag) => self.handle_start_tag(tag),
142            Event::End(tag) => self.handle_end_tag(tag),
143            Event::Text(text) => self.handle_text(text),
144            Event::Code(code) => self.handle_inline_code(code),
145            Event::Html(html) => self.handle_html(html),
146            Event::SoftBreak => self.handle_soft_break(),
147            Event::HardBreak => self.handle_hard_break(),
148            Event::Rule => self.handle_rule(),
149            Event::FootnoteReference(_) => {}
150            Event::TaskListMarker(checked) => self.handle_task_list_marker(checked),
151        }
152    }
153
154    /// Get the prefix for the current line (blockquote markers)
155    fn get_line_prefix(&self) -> String {
156        let mut prefix = String::new();
157        for _ in 0..self.blockquote_depth {
158            prefix.push_str("> ");
159        }
160        prefix
161    }
162
163    /// Get the continuation indent for wrapped lines
164    fn get_continuation_indent(&self) -> String {
165        let mut indent = self.get_line_prefix();
166
167        // Add list indentation for continuation lines
168        if self.list_depth > 0 {
169            // Each list level needs indentation, plus space for the marker
170            indent.push_str(&"  ".repeat(self.list_depth));
171        }
172
173        indent
174    }
175
176    /// Convert inline buffer to a flat string (for wrapping), preserving structure
177    fn render_inline_buffer(&self) -> String {
178        let mut result = String::new();
179        for elem in &self.inline_buffer {
180            match elem {
181                InlineElement::Text(s) => result.push_str(s),
182                InlineElement::Code(s) => {
183                    result.push('`');
184                    result.push_str(s);
185                    result.push('`');
186                }
187                InlineElement::EmphasisStart => result.push('*'),
188                InlineElement::EmphasisEnd => result.push('*'),
189                InlineElement::StrongStart => result.push_str("**"),
190                InlineElement::StrongEnd => result.push_str("**"),
191                InlineElement::StrikethroughStart => result.push_str("~~"),
192                InlineElement::StrikethroughEnd => result.push_str("~~"),
193                InlineElement::LinkStart => result.push('['),
194                InlineElement::LinkEnd(url) => {
195                    result.push_str("](");
196                    result.push_str(url);
197                    result.push(')');
198                }
199                InlineElement::ImageStart => result.push_str("!["),
200                InlineElement::ImageEnd { url, title } => {
201                    result.push_str("](");
202                    result.push_str(url);
203                    if !title.is_empty() {
204                        result.push_str(" \"");
205                        result.push_str(title);
206                        result.push('"');
207                    }
208                    result.push(')');
209                }
210                InlineElement::HardBreak => result.push('\u{FFFF}'), // Placeholder for hard break
211                InlineElement::SoftBreak => {
212                    match self.wrap_mode {
213                        WrapMode::Preserve => result.push('\u{FFFE}'), // Placeholder for preserved line break
214                        WrapMode::Always | WrapMode::Never => result.push(' '),
215                    }
216                }
217            }
218        }
219        result
220    }
221
222    /// Wrap text to fit within line_width
223    /// Returns wrapped text with proper line prefixes
224    fn wrap_text(&self, text: &str, first_line_prefix: &str, continuation_prefix: &str) -> String {
225        let hard_break_placeholder = "\u{FFFF}";
226        let soft_break_placeholder = "\u{FFFE}";
227
228        match self.wrap_mode {
229            WrapMode::Preserve => {
230                // Preserve mode: keep line breaks as-is, just add prefixes
231                self.wrap_text_preserve(
232                    text,
233                    first_line_prefix,
234                    continuation_prefix,
235                    hard_break_placeholder,
236                    soft_break_placeholder,
237                )
238            }
239            WrapMode::Never => {
240                // Never mode: unwrap everything to single lines (per paragraph)
241                self.wrap_text_never(text, first_line_prefix, hard_break_placeholder)
242            }
243            WrapMode::Always => {
244                // Always mode: reflow text to fit width
245                self.wrap_text_always(
246                    text,
247                    first_line_prefix,
248                    continuation_prefix,
249                    hard_break_placeholder,
250                )
251            }
252        }
253    }
254
255    /// Preserve mode: keep original line breaks
256    fn wrap_text_preserve(
257        &self,
258        text: &str,
259        first_line_prefix: &str,
260        continuation_prefix: &str,
261        hard_break_placeholder: &str,
262        soft_break_placeholder: &str,
263    ) -> String {
264        let mut result = String::new();
265        let mut is_first_line = true;
266
267        // Split on both hard and soft break placeholders
268        // We need to track which type of break it was
269        let mut remaining = text;
270
271        while !remaining.is_empty() {
272            // Find the next break (either hard or soft)
273            let hard_pos = remaining.find(hard_break_placeholder);
274            let soft_pos = remaining.find(soft_break_placeholder);
275
276            let (segment, break_type, rest) = match (hard_pos, soft_pos) {
277                (Some(h), Some(s)) if h < s => {
278                    let (seg, rest) = remaining.split_at(h);
279                    (seg, Some("hard"), &rest[hard_break_placeholder.len()..])
280                }
281                (Some(h), Some(s)) if s < h => {
282                    let (seg, rest) = remaining.split_at(s);
283                    (seg, Some("soft"), &rest[soft_break_placeholder.len()..])
284                }
285                (Some(h), None) => {
286                    let (seg, rest) = remaining.split_at(h);
287                    (seg, Some("hard"), &rest[hard_break_placeholder.len()..])
288                }
289                (None, Some(s)) => {
290                    let (seg, rest) = remaining.split_at(s);
291                    (seg, Some("soft"), &rest[soft_break_placeholder.len()..])
292                }
293                (Some(h), Some(_)) => {
294                    // h == s, shouldn't happen, but handle it
295                    let (seg, rest) = remaining.split_at(h);
296                    (seg, Some("hard"), &rest[hard_break_placeholder.len()..])
297                }
298                (None, None) => (remaining, None, ""),
299            };
300
301            // Add the prefix
302            let prefix = if is_first_line {
303                first_line_prefix
304            } else {
305                continuation_prefix
306            };
307            result.push_str(prefix);
308
309            // Add the segment content (normalize internal whitespace but preserve words)
310            let words: Vec<&str> = segment.split_whitespace().collect();
311            result.push_str(&words.join(" "));
312
313            // Add the appropriate line ending
314            match break_type {
315                Some("hard") => {
316                    result.push_str("  \n");
317                }
318                Some("soft") => {
319                    result.push('\n');
320                }
321                None => {}
322                _ => {}
323            }
324
325            remaining = rest;
326            is_first_line = false;
327        }
328
329        result
330    }
331
332    /// Never mode: unwrap to single line
333    fn wrap_text_never(
334        &self,
335        text: &str,
336        first_line_prefix: &str,
337        hard_break_placeholder: &str,
338    ) -> String {
339        // Split on hard breaks - those we preserve
340        let segments: Vec<&str> = text.split(hard_break_placeholder).collect();
341        let mut result = String::new();
342
343        for (seg_idx, segment) in segments.iter().enumerate() {
344            let words: Vec<&str> = segment.split_whitespace().collect();
345
346            if seg_idx == 0 {
347                result.push_str(first_line_prefix);
348            }
349
350            result.push_str(&words.join(" "));
351
352            // Add hard break if not the last segment
353            if seg_idx < segments.len() - 1 {
354                result.push_str("  \n");
355                result.push_str(first_line_prefix);
356            }
357        }
358
359        result
360    }
361
362    /// Always mode: reflow text to fit width (original behavior)
363    fn wrap_text_always(
364        &self,
365        text: &str,
366        first_line_prefix: &str,
367        continuation_prefix: &str,
368        hard_break_placeholder: &str,
369    ) -> String {
370        // First, handle hard breaks by splitting on them
371        let segments: Vec<&str> = text.split(hard_break_placeholder).collect();
372
373        let mut result = String::new();
374
375        for (seg_idx, segment) in segments.iter().enumerate() {
376            // Normalize whitespace within this segment
377            let words: Vec<&str> = segment.split_whitespace().collect();
378
379            if words.is_empty() {
380                if seg_idx < segments.len() - 1 {
381                    // There was a hard break here, add it
382                    if !result.is_empty() {
383                        result.push_str("  \n");
384                        result.push_str(continuation_prefix);
385                    }
386                }
387                continue;
388            }
389
390            let prefix = if seg_idx == 0 && result.is_empty() {
391                first_line_prefix
392            } else {
393                continuation_prefix
394            };
395
396            let mut current_line = if result.is_empty() || result.ends_with('\n') {
397                prefix.to_string()
398            } else {
399                String::new()
400            };
401
402            let mut first_word_on_line = result.is_empty() || result.ends_with('\n');
403
404            for word in &words {
405                let space_needed = if first_word_on_line { 0 } else { 1 };
406                let would_be_length = current_line.len() + space_needed + word.len();
407
408                if !first_word_on_line && would_be_length > self.line_width {
409                    // Wrap to new line (use plain \n - NOT hard break)
410                    result.push_str(&current_line);
411                    result.push('\n');
412                    current_line = continuation_prefix.to_string();
413                    current_line.push_str(word);
414                    first_word_on_line = false;
415                } else {
416                    if !first_word_on_line {
417                        current_line.push(' ');
418                    }
419                    current_line.push_str(word);
420                    first_word_on_line = false;
421                }
422            }
423
424            result.push_str(&current_line);
425
426            // Add hard break if not the last segment
427            if seg_idx < segments.len() - 1 {
428                result.push_str("  \n");
429                result.push_str(continuation_prefix);
430            }
431        }
432
433        result
434    }
435
436    /// Flush the inline buffer, wrapping text appropriately
437    fn flush_inline_buffer(&mut self) {
438        if self.inline_buffer.is_empty() {
439            return;
440        }
441
442        let rendered = self.render_inline_buffer();
443
444        if rendered.trim().is_empty() {
445            self.inline_buffer.clear();
446            return;
447        }
448
449        let prefix = self.get_line_prefix();
450        let continuation = self.get_continuation_indent();
451
452        let wrapped = self.wrap_text(&rendered, &prefix, &continuation);
453        self.output.push_str(&wrapped);
454        self.inline_buffer.clear();
455    }
456
457    /// Ensure there's a blank line before the next block element
458    fn ensure_blank_line(&mut self) {
459        if self.output.is_empty() {
460            return;
461        }
462        if !self.output.ends_with("\n\n") {
463            if self.output.ends_with('\n') {
464                self.output.push('\n');
465            } else {
466                self.output.push_str("\n\n");
467            }
468        }
469    }
470
471    fn handle_start_tag(&mut self, tag: Tag) {
472        match tag {
473            Tag::Heading(level, _, _) => {
474                self.flush_inline_buffer();
475                self.ensure_blank_line();
476                let level_num = level as usize;
477                self.output.push_str(&"#".repeat(level_num));
478                self.output.push(' ');
479                self.context_stack.push(Context::Heading {
480                    level: level_num as u32,
481                });
482            }
483
484            Tag::Paragraph => {
485                self.flush_inline_buffer();
486                // Don't add blank line if we're directly inside a list item
487                // (list items implicitly contain paragraphs)
488                let in_list_item = self.context_stack.last() == Some(&Context::ListItem);
489                if !in_list_item {
490                    self.ensure_blank_line();
491                }
492                // Don't add prefix here - wrap_text will handle it
493                self.context_stack.push(Context::Paragraph);
494            }
495
496            Tag::List(first_item_number) => {
497                self.flush_inline_buffer();
498                self.ensure_blank_line();
499                self.list_depth += 1;
500                self.context_stack.push(Context::List {
501                    ordered: first_item_number.is_some(),
502                });
503            }
504
505            Tag::Item => {
506                self.flush_inline_buffer();
507                if !self.output.ends_with('\n') && !self.output.is_empty() {
508                    self.output.push('\n');
509                }
510
511                // Add blockquote prefix
512                let prefix = self.get_line_prefix();
513                self.output.push_str(&prefix);
514
515                // Add list indentation (for nested lists)
516                if self.list_depth > 1 {
517                    self.output.push_str(&"  ".repeat(self.list_depth - 1));
518                }
519
520                // Add list marker
521                let is_ordered = self
522                    .context_stack
523                    .iter()
524                    .rev()
525                    .find_map(|c| match c {
526                        Context::List { ordered, .. } => Some(*ordered),
527                        _ => None,
528                    })
529                    .unwrap_or(false);
530
531                if is_ordered {
532                    self.output.push_str("1. ");
533                } else {
534                    self.output.push_str("- ");
535                }
536
537                self.context_stack.push(Context::ListItem);
538            }
539
540            Tag::BlockQuote => {
541                self.flush_inline_buffer();
542                self.ensure_blank_line();
543                self.blockquote_depth += 1;
544                self.context_stack.push(Context::Blockquote);
545            }
546
547            Tag::CodeBlock(kind) => {
548                self.flush_inline_buffer();
549                self.ensure_blank_line();
550                self.in_code_block = true;
551
552                // Extract language if specified
553                let lang = match kind {
554                    pulldown_cmark::CodeBlockKind::Fenced(lang) if !lang.is_empty() => {
555                        lang.to_string()
556                    }
557                    _ => String::new(),
558                };
559
560                self.output.push_str("```");
561                self.output.push_str(&lang);
562                self.output.push('\n');
563                self.context_stack.push(Context::CodeBlock);
564            }
565
566            Tag::Strong => {
567                self.inline_buffer.push(InlineElement::StrongStart);
568                self.context_stack.push(Context::Strong);
569            }
570
571            Tag::Emphasis => {
572                self.inline_buffer.push(InlineElement::EmphasisStart);
573                self.context_stack.push(Context::Emphasis);
574            }
575
576            Tag::Strikethrough => {
577                self.inline_buffer.push(InlineElement::StrikethroughStart);
578                self.context_stack.push(Context::Strikethrough);
579            }
580
581            Tag::Link(_, url, _) => {
582                self.inline_buffer.push(InlineElement::LinkStart);
583                self.context_stack.push(Context::Link {
584                    url: url.to_string(),
585                });
586            }
587
588            Tag::Image(_, url, title) => {
589                self.inline_buffer.push(InlineElement::ImageStart);
590                self.context_stack.push(Context::Image {
591                    url: url.to_string(),
592                    title: title.to_string(),
593                });
594            }
595
596            _ => {}
597        }
598    }
599
600    fn handle_end_tag(&mut self, tag: Tag) {
601        match tag {
602            Tag::Heading { .. } => {
603                self.flush_inline_buffer();
604                self.output.push('\n');
605                self.context_stack.pop();
606            }
607
608            Tag::Paragraph => {
609                self.flush_inline_buffer();
610                self.output.push('\n');
611                self.context_stack.pop();
612            }
613
614            Tag::List(_) => {
615                self.flush_inline_buffer();
616                if !self.output.ends_with('\n') {
617                    self.output.push('\n');
618                }
619                self.list_depth = self.list_depth.saturating_sub(1);
620                self.context_stack.pop();
621            }
622
623            Tag::Item => {
624                self.flush_inline_buffer();
625                self.context_stack.pop();
626            }
627
628            Tag::BlockQuote => {
629                self.flush_inline_buffer();
630                if !self.output.ends_with('\n') {
631                    self.output.push('\n');
632                }
633                self.blockquote_depth = self.blockquote_depth.saturating_sub(1);
634                self.context_stack.pop();
635            }
636
637            Tag::CodeBlock(_) => {
638                self.output.push_str("```\n");
639                self.in_code_block = false;
640                self.context_stack.pop();
641            }
642
643            Tag::Strong => {
644                self.inline_buffer.push(InlineElement::StrongEnd);
645                self.context_stack.pop();
646            }
647
648            Tag::Emphasis => {
649                self.inline_buffer.push(InlineElement::EmphasisEnd);
650                self.context_stack.pop();
651            }
652
653            Tag::Strikethrough => {
654                self.inline_buffer.push(InlineElement::StrikethroughEnd);
655                self.context_stack.pop();
656            }
657
658            Tag::Link(_, _, _) => {
659                // Get the URL from context
660                if let Some(Context::Link { url }) = self.context_stack.pop() {
661                    self.inline_buffer.push(InlineElement::LinkEnd(url));
662                }
663            }
664
665            Tag::Image(_, _, _) => {
666                // Get the URL and title from context
667                if let Some(Context::Image { url, title }) = self.context_stack.pop() {
668                    self.inline_buffer
669                        .push(InlineElement::ImageEnd { url, title });
670                }
671            }
672
673            _ => {}
674        }
675    }
676
677    fn handle_text(&mut self, text: CowStr) {
678        if self.in_code_block {
679            // Code blocks: preserve exactly
680            self.output.push_str(&text);
681        } else {
682            // Regular text: add to inline buffer
683            self.inline_buffer
684                .push(InlineElement::Text(text.to_string()));
685        }
686    }
687
688    fn handle_inline_code(&mut self, code: CowStr) {
689        self.inline_buffer
690            .push(InlineElement::Code(code.to_string()));
691    }
692
693    fn handle_html(&mut self, html: CowStr) {
694        self.flush_inline_buffer();
695        self.ensure_blank_line();
696        self.output.push_str(&html);
697        if !html.ends_with('\n') {
698            self.output.push('\n');
699        }
700    }
701
702    fn handle_soft_break(&mut self) {
703        if !self.in_code_block {
704            // Soft break = space (will be normalized during flush)
705            self.inline_buffer.push(InlineElement::SoftBreak);
706        }
707    }
708
709    fn handle_hard_break(&mut self) {
710        // Hard break from source - preserve it!
711        self.inline_buffer.push(InlineElement::HardBreak);
712    }
713
714    fn handle_rule(&mut self) {
715        self.flush_inline_buffer();
716        self.ensure_blank_line();
717        self.output.push_str("---\n");
718    }
719
720    fn handle_task_list_marker(&mut self, checked: bool) {
721        if checked {
722            self.inline_buffer
723                .push(InlineElement::Text("[x] ".to_string()));
724        } else {
725            self.inline_buffer
726                .push(InlineElement::Text("[ ] ".to_string()));
727        }
728    }
729}