pulldown_cmark_to_flowed/
lib.rs1#![doc = concat!(include_str!("../tests/example.md"), "```")]
26#![doc = concat!(include_str!("../tests/example.txt"), "```")]
31use hashbrown::HashMap;
46use pulldown_cmark::{CowStr, Event, HeadingLevel, LinkType, Options, Tag, TagEnd};
47use std::{mem, ops::AddAssign};
48
49pub const CONTENT_TYPE: &str = r#"text/plain; charset="utf-8"; format="flowed""#;
51
52const MAX_LINE_WIDTH: usize = 78;
54const PREFERRED_LINE_WIDTH: usize = 66;
58
59pub fn parser_options() -> Options {
64 Options::ENABLE_FOOTNOTES
65 | Options::ENABLE_STRIKETHROUGH
66 | Options::ENABLE_SMART_PUNCTUATION
67 | Options::ENABLE_WIKILINKS
68}
69
70pub fn push_text<'a, I>(s: &mut String, iter: I)
79where
80 I: Iterator<Item = Event<'a>>
81{
82 let mut state = State::new(s);
83 push_text_to_state(&mut state, iter);
84}
85
86struct State<'a, 's> {
88 txt: &'s mut String,
90 trailing_newlines: u8,
93
94 indentations: Vec<&'static str>,
96
97 heading_len: usize,
99 heading_lvl: Option<HeadingLevel>,
101
102 code_block: bool,
104
105 html_blocks: u8,
107
108 lists: Vec<Option<u64>>,
111
112 footnotes: Vec<String>,
114 footnote_labels: HashMap<CowStr<'a>, usize>,
116 in_footnote: Option<usize>,
118 footnote_links: Vec<usize>
120}
121
122impl<'s> State<'_, 's> {
123 fn new(txt: &'s mut String) -> Self {
124 Self {
125 txt,
126 trailing_newlines: u8::MAX,
127
128 indentations: Vec::new(),
129
130 heading_len: 0,
131 heading_lvl: None,
132
133 code_block: false,
134
135 html_blocks: 0,
136
137 lists: Vec::new(),
138
139 footnotes: Vec::new(),
140 footnote_labels: HashMap::new(),
141 in_footnote: None,
142 footnote_links: Vec::new()
143 }
144 }
145}
146
147impl<T: AsRef<str>> AddAssign<T> for State<'_, '_> {
148 fn add_assign(&mut self, rhs: T) {
149 *self.txt += rhs.as_ref();
150 }
151}
152
153impl<'a> State<'a, '_> {
154 fn column(&self) -> usize {
156 let line_begin_idx = self.txt.rfind("\r\n").map(|idx| idx + 2).unwrap_or(0);
157 self.txt[line_begin_idx ..].chars().count() - 1
159 }
160
161 fn newline_impl(&mut self, space_stuffing: bool, quotes_only: bool) {
162 if let Some(mut idx) = self.txt.rfind("\r\n").map(|idx| idx + 2) {
164 while self.txt[idx ..].starts_with('>') {
165 idx += 1;
166 }
167 if self.txt[idx ..].chars().all(|ch| ch == ' ') {
168 self.txt.truncate(idx);
169 }
170 }
171
172 *self.txt += "\r\n";
173 if space_stuffing
174 && self
175 .indentations
176 .first()
177 .is_none_or(|indent| *indent != ">")
178 {
179 self.txt.push(' ');
180 }
181 self.trailing_newlines += 1;
182
183 for indent in &self.indentations {
185 if !quotes_only || *indent == ">" {
186 *self.txt += indent;
187 }
188 }
189
190 if space_stuffing && self.indentations.last().is_some_and(|i| *i == ">") {
193 self.txt.push(' ');
194 }
195 }
196
197 fn newlines(&mut self, count: u8) {
200 while self.trailing_newlines < count {
201 self.newline_impl(true, false);
204 }
205 }
206
207 fn add_text_unwrapped(&mut self, text: &str) {
208 if text.is_empty() {
209 return;
210 }
211
212 *self.txt += text;
213 self.trailing_newlines = 0;
214
215 if self.heading_lvl.is_some() {
216 let mut column = self.column();
217 if self.txt.ends_with(' ') {
218 column -= 1;
219 }
220 self.heading_len = self.heading_len.max(column);
221 }
222 }
223
224 fn add_text_wrapping(&mut self, text: &str) {
225 if text.is_empty() {
226 return;
227 }
228
229 let column = self.column();
230 let optimal_length = PREFERRED_LINE_WIDTH.saturating_sub(column);
231 let max_length = MAX_LINE_WIDTH.saturating_sub(column);
232
233 if text.len() <= optimal_length {
234 self.add_text_unwrapped(text);
235 return;
236 }
237
238 let mut space_before = None;
240 let mut space_after = None;
241 let mut space_after_within_max_length = false;
242 let mut byte = 0;
243 for (i, ch) in text.chars().enumerate() {
244 if ch == ' ' {
245 if i <= optimal_length {
246 space_before = Some(byte);
247 } else {
248 space_after = Some(byte);
249 space_after_within_max_length = i <= max_length;
250 break;
251 }
252 }
253 byte += ch.len_utf8();
254 }
255 let space = match (space_before, space_after) {
256 (Some(space_before), None) => space_before,
257 (None, Some(space_after)) if space_after_within_max_length => space_after,
258 (Some(space_before), Some(space_after)) => {
259 if !space_after_within_max_length {
260 space_before
262 } else if (optimal_length - space_before) <= space_after {
263 space_before
266 } else {
267 space_after
268 }
269 },
270 (None, _) if self.trailing_newlines == 0 && self.txt.ends_with(' ') => {
271 self.newline_impl(true, true);
272 self.add_text_wrapping(text);
273 return;
274 },
275 (None, Some(space_after)) => {
276 space_after
278 },
279 _ => {
280 self.add_text_unwrapped(text);
282 return;
283 }
284 };
285
286 let (before, after) = text.split_at(space + 1);
297 self.add_text_unwrapped(before);
298 self.newline_impl(true, true);
299 self.add_text_wrapping(after);
300 }
301
302 fn get_or_create_footnote(&mut self, label: CowStr<'a>) -> usize {
303 match self.footnote_labels.get(&label) {
304 Some(footnote_idx) => *footnote_idx,
305 None => {
306 let footnote_idx = self.footnotes.len();
307 self.footnotes.push(String::new());
308 self.footnote_labels.insert(label, footnote_idx);
309 footnote_idx
310 }
311 }
312 }
313}
314
315fn push_text_to_state<'a, I>(txt: &mut State<'a, '_>, iter: I)
317where
318 I: Iterator<Item = Event<'a>>
319{
320 if txt.txt.is_empty() || txt.txt.ends_with('\n') {
322 txt.txt.push(' ');
323 }
324
325 for event in iter {
327 match event {
328 Event::Start(Tag::Paragraph) => {
329 if let Some(footnote_idx) = txt.in_footnote {
330 let footnote_txt = &mut txt.footnotes[footnote_idx];
331 if !footnote_txt.is_empty() {
332 *footnote_txt += "\n\n";
333 }
334 } else {
335 txt.newlines(2);
336 }
337 },
338
339 Event::Start(Tag::Heading { level, .. }) => {
340 txt.newlines(3);
341 txt.heading_lvl = Some(level);
342 txt.heading_len = 0;
343 },
344
345 Event::Start(Tag::BlockQuote(_)) => {
346 txt.newlines(1);
349 txt.indentations.push(">");
350 },
351
352 Event::Start(Tag::CodeBlock(_)) => {
353 txt.newlines(2);
354 txt.code_block = true;
355 },
356
357 Event::Start(Tag::HtmlBlock) => {
358 txt.html_blocks += 1;
359 },
360
361 Event::Start(Tag::List(list_idx)) => {
362 txt.newlines(2);
363 txt.lists.push(list_idx);
364 },
365
366 Event::Start(Tag::Item) => {
367 txt.newlines(2);
368 let list_idx = txt
369 .lists
370 .last_mut()
371 .expect("Markdown parser found a list item outside of a list");
372 if let Some(list_idx) = list_idx {
373 let list_idx_str = format!("{list_idx}. ");
374 for _ in 0 .. 4usize.saturating_sub(list_idx_str.len()) {
375 txt.txt.push(' ');
376 }
377 *txt.txt += &list_idx_str;
378 *list_idx += 1;
379 } else {
380 *txt += " • ";
381 }
382 txt.indentations.push(" ");
383 },
384
385 Event::Start(Tag::FootnoteDefinition(label)) => {
386 let footnote_idx = txt.get_or_create_footnote(label);
387 txt.in_footnote = Some(footnote_idx);
388 },
389
390 Event::Start(Tag::DefinitionList)
391 | Event::Start(Tag::DefinitionListTitle)
392 | Event::Start(Tag::DefinitionListDefinition) => {
393 unreachable!("Definition lists are not enabled in the parser options")
394 },
395
396 Event::Start(Tag::Table(_))
397 | Event::Start(Tag::TableHead)
398 | Event::Start(Tag::TableRow)
399 | Event::Start(Tag::TableCell) => {
400 unreachable!("Tables are not enabled in the parser options")
401 },
402
403 Event::Start(Tag::Emphasis)
404 | Event::Start(Tag::Strong)
405 | Event::Start(Tag::Strikethrough) => {
406 },
408
409 Event::Start(Tag::Superscript) | Event::Start(Tag::Subscript) => {
410 unreachable!("Super/Subscript are not enabled in the parser options")
411 },
412
413 Event::Start(Tag::Link {
414 link_type: LinkType::Autolink,
415 ..
416 }) => {
417 txt.footnote_links.push(0);
420 },
421
422 Event::Start(Tag::Link { dest_url, .. })
423 | Event::Start(Tag::Image { dest_url, .. }) => {
424 txt.footnotes.push(dest_url.into_string());
425 txt.footnote_links.push(txt.footnotes.len());
426 },
427
428 Event::Start(Tag::MetadataBlock(_)) => {
429 unreachable!("Metadata blacks are not enabled in the parser options")
430 },
431
432 Event::End(TagEnd::Paragraph) => {
433 },
435
436 Event::End(TagEnd::Heading(level)) => {
437 txt.newlines(1);
438 let ch = match level {
439 HeadingLevel::H1 => '=',
440 _ => '-'
441 };
442 for _ in 0 .. txt.heading_len {
443 txt.txt.push(ch);
444 }
445 txt.trailing_newlines = 0;
446 txt.newlines(2);
447 txt.heading_lvl = None;
448 },
449
450 Event::End(TagEnd::BlockQuote(_)) => {
451 let indent = txt.indentations.pop();
452 debug_assert_eq!(indent, Some(">"));
453 },
454
455 Event::End(TagEnd::CodeBlock) => {
456 debug_assert!(txt.code_block);
457 txt.code_block = false;
458 },
459
460 Event::End(TagEnd::HtmlBlock) => {
461 txt.html_blocks -= 1;
462 },
463
464 Event::End(TagEnd::List(_)) => {
465 let list_idx = txt.lists.pop();
466 debug_assert!(list_idx.is_some());
467 },
468
469 Event::End(TagEnd::Item) => {
470 let indent = txt.indentations.pop();
471 debug_assert_eq!(indent, Some(" "));
472 },
473
474 Event::End(TagEnd::FootnoteDefinition) => {
475 txt.in_footnote = None;
476 },
477
478 Event::End(TagEnd::DefinitionList)
479 | Event::End(TagEnd::DefinitionListTitle)
480 | Event::End(TagEnd::DefinitionListDefinition) => {
481 unreachable!("Definition lists are not enabled in the parser options")
482 },
483
484 Event::End(TagEnd::Table)
485 | Event::End(TagEnd::TableHead)
486 | Event::End(TagEnd::TableRow)
487 | Event::End(TagEnd::TableCell) => {
488 unreachable!("Tables are not enabled in the parser options")
489 },
490
491 Event::End(TagEnd::Emphasis)
492 | Event::End(TagEnd::Strong)
493 | Event::End(TagEnd::Strikethrough) => {
494 },
496
497 Event::End(TagEnd::Superscript) | Event::End(TagEnd::Subscript) => {
498 unreachable!("Super/Subscript are not enabled in the parser options")
499 },
500
501 Event::End(TagEnd::Link) | Event::End(TagEnd::Image) => {
502 let footnote_idx = txt
503 .footnote_links
504 .pop()
505 .expect("Markdown parser found a closing link/image that isn't open");
506 if footnote_idx != 0 {
507 txt.add_text_wrapping(&format!(" [{footnote_idx}]"));
508 }
509 },
510
511 Event::End(TagEnd::MetadataBlock(_)) => {
512 unreachable!("Metadata blocks are not enabled in the parser options")
513 },
514
515 Event::Text(text) | Event::Code(text) => {
517 if let Some(footnote_idx) = txt.in_footnote {
521 txt.footnotes[footnote_idx] += &text;
522 }
523 else if txt.code_block {
525 for line in text.lines() {
526 *txt += line.trim_end_matches(' ');
528 txt.trailing_newlines = 0;
529 txt.newlines(1);
530 }
531 } else {
532 txt.add_text_wrapping(&text);
533 }
534 },
535
536 Event::InlineMath(_) | Event::DisplayMath(_) => {
537 unreachable!("Math is not enabled in the parser options")
538 },
539
540 Event::Html(_) | Event::InlineHtml(_) => {
541 },
543
544 Event::FootnoteReference(label) => {
545 let footnote_idx = txt.get_or_create_footnote(label);
546 txt.add_text_wrapping(&format!("[{footnote_idx}]"));
547 },
548
549 Event::SoftBreak => {
550 if let Some(footnote_idx) = txt.in_footnote {
552 txt.footnotes[footnote_idx].push(' ');
553 } else {
554 txt.add_text_wrapping(" ");
555 }
556 },
557
558 Event::HardBreak => {
559 if let Some(footnote_idx) = txt.in_footnote {
560 txt.footnotes[footnote_idx] += "\n";
561 } else {
562 while txt.txt.ends_with(' ') {
565 txt.txt.pop();
566 }
567 txt.trailing_newlines = 0;
569 txt.newlines(1);
570 }
571 },
572
573 Event::Rule => {
574 txt.newlines(1);
575 for _ in 0 .. MAX_LINE_WIDTH {
576 txt.add_text_unwrapped("-");
577 }
578 txt.newlines(1);
579 },
580
581 Event::TaskListMarker(_) => {
582 unreachable!("Task lists are not enabled in the parser options")
583 }
584 }
585 }
586
587 if !txt.footnotes.is_empty() {
589 txt.newlines(1);
590 txt.newline_impl(false, true);
592 debug_assert!(txt.txt.ends_with("\r\n"));
593 txt.add_text_unwrapped("-- ");
594 for (i, f) in mem::take(&mut txt.footnotes).into_iter().enumerate() {
595 let multiline = f.contains('\n');
596 txt.newlines(1);
597 if multiline {
598 txt.newlines(2);
599 }
600 let f_label = format!("[{}]: ", i + 1);
601 for _ in 0 .. 6usize.saturating_sub(f_label.len()) {
602 txt.txt.push(' ');
603 }
604 *txt += &f_label;
605 txt.indentations.push(" ");
606 for line in f.lines() {
607 txt.newlines(1);
608 txt.add_text_wrapping(line);
609 txt.trailing_newlines = 0;
613 }
614 txt.indentations.pop();
615 if multiline {
616 txt.newlines(2);
617 }
618 }
619 }
620
621 txt.newline_impl(false, true);
623}