extended_pulldown/
lib.rs

1//! This crate extends `pulldown_cmark` to do the following:
2//!
3//!  - smarten quotes according to a more complex but substantially slower algorithm
4//!   than that used in `pulldown_cmark` versions greater than 8.0
5//!  - substitute unicode en-dashes, em-dashes and ellipsis for `--`, `---` and `...`.
6//!  - allow multiple-paragraph footnotes by interpreting an indented and unlabelled code block
7//!    within a footnote as text to be parsed again.
8//!  - allow several new tags:
9//!        
10//!    * Sans
11//!    * Centred
12//!    * Right-aligned
13//!    * Small caps
14//!    * Subscript
15//!    * Superscript
16//!
17//! It also provides a function, `flatten_footnotes`,
18//! which replaces footnote references and definitions with a
19//! single group of tagged text; this allows
20//! rendering to targets like LaTeX which need a footnote to be
21//! defined at the point to which it refers. It inserts empty footnotes where
22//! a definition is missing.
23//!
24//! In general, this crate mimics the structs and methods of `pulldown_cmark`.
25//! However its more complex conception of markdown comes at the cost
26//! of much slower parsing.
27//! It is therefore not recommended to use instead of `pulldown_cmark`
28//! except where this complexity is required.
29//!
30//! The markdown syntax to use is otherwise essentially that of CommonMark
31//! togther with `pulldown_cmark`'s extensions.
32//!
33//!
34//! # Examples
35//!
36//! ## Inline Spans
37//!
38//! These are parsed preferentially from html spans:
39//!
40//! ```
41//! use extended_pulldown::Parser;
42//! use extended_pulldown::Event::*;
43//! use extended_pulldown::Tag::*;
44//!
45//! let text = concat!(r#"<span class="sans">Sans text</span>"#,
46//! r#"<span class="centred">Centred text</span>"#,
47//! r#"<span class="right-aligned">Right-aligned text</span>"#,
48//! r#"<span class="smallcaps">Small caps text</span>"#,
49//! r#"<span class="subscript">Subscript text</span>"#,
50//! r#"<span class="superscript">Superscript text</span>"#);
51//!    
52//! let parsed = Parser::new(text)
53//!     .collect::<Vec<_>>();
54//! let expected = vec![
55//!     Start(Paragraph),
56//!     Start(Sans),
57//!     Text("Sans text".into()),
58//!     End(Sans),
59//!     Start(Centred),
60//!     Text("Centred text".into()),
61//!     End(Centred),
62//!     Start(RightAligned),
63//!     Text("Right-aligned text".into()),
64//!     End(RightAligned),
65//!     Start(SmallCaps),
66//!     Text("Small caps text".into()),
67//!     End(SmallCaps),
68//!     Start(Subscript),
69//!     Text("Subscript text".into()),
70//!     End(Subscript),
71//!     Start(Superscript),
72//!     Text("Superscript text".into()),
73//!     End(Superscript),
74//!     End(Paragraph)
75//! ];
76//!  assert_eq!(parsed, expected);
77//! ```
78//!
79//! However, markdown syntax is also extended slightly,
80//! to allow wrapping a span of alphanumeric text in `^` to indicate superscript
81//! and in `~` to indicate subscript: `25^th^ July`, `H~2~O`.
82//!
83//! ## Multipara footnotes
84//!
85//! ```
86//! use extended_pulldown::Parser;
87//! use extended_pulldown::Event::*;
88//! use extended_pulldown::Tag::*;
89//! use pulldown_cmark::CodeBlockKind::Indented;
90//! let text = "Hello World[^footnote]\n\n[^footnote]:\n\tA footnote\n\n\tIn *multiple* pieces";
91//! let output = Parser::new(text)
92//!     .collect::<Vec<_>>();
93//! let pulldown_output = vec![
94//!     Start(Paragraph),
95//!     Text("Hello World".into()),
96//!     FootnoteReference("footnote".into()),
97//!     End(Paragraph),
98//!     Start(FootnoteDefinition("footnote".into())),
99//!     Start(CodeBlock(Indented)),
100//!     Text("A footnote\n\n".into()),
101//!     Text("In *multiple* pieces".into()),
102//!     End(CodeBlock(Indented)),
103//!     End(FootnoteDefinition("footnote".into()))
104//! ];
105//! let extended_pulldown_output = vec![
106//!     Start(Paragraph),
107//!     Text("Hello World".into()),
108//!     FootnoteReference("footnote".into()),
109//!     End(Paragraph),
110//!     Start(FootnoteDefinition("footnote".into())),
111//!     Start(Paragraph),
112//!     Text("A footnote".into()),
113//!     End(Paragraph),
114//!     Start(Paragraph),
115//!     Text("In ".into()),
116//!     Start(Emphasis),
117//!     Text("multiple".into()),
118//!     End(Emphasis),
119//!     Text(" pieces".into()),
120//!     End(Paragraph),
121//!     End(FootnoteDefinition("footnote".into()))
122//! ];
123//! assert!(output != pulldown_output);
124//! assert_eq!(output, extended_pulldown_output);
125//! ```
126//!
127//! ## Flattening footnotes
128//!
129//! ```
130//! use extended_pulldown::Event::*;
131//! use extended_pulldown::Tag;
132//!
133//! let events = vec![
134//!   Start(Tag::Paragraph),
135//!   Text("Hello".into()),
136//!   FootnoteReference("1".into()),
137//!  End(Tag::Paragraph),
138//!   Start(Tag::FootnoteDefinition("1".into())),
139//!   Start(Tag::Paragraph),
140//!   Text("World".into()),
141//!  End(Tag::Paragraph),
142//!   End(Tag::FootnoteDefinition("1".into())),
143//! ];
144//!
145//! let flattened = extended_pulldown::flatten_footnotes(events);
146//! let expected = vec![
147//!   Start(Tag::Paragraph),
148//!   Text("Hello".into()),
149//!   Start(Tag::FlattenedFootnote),
150//!   Text("World".into()),
151//!   End(Tag::FlattenedFootnote),
152//!   End(Tag::Paragraph)
153//!];
154//!
155//! assert_eq!(flattened, expected);
156//! ```
157//!
158//#![deny(dead_code)]
159#![deny(unreachable_patterns)]
160#![deny(unused_extern_crates)]
161#![deny(unused_imports)]
162#![deny(unused_qualifications)]
163#![deny(clippy::all)]
164#![deny(missing_docs)]
165#![deny(variant_size_differences)]
166
167use pulldown_cmark::Event as PulldownEvent;
168use pulldown_cmark::Options as PulldownOptions;
169use pulldown_cmark::Tag as PulldownTag;
170use pulldown_cmark::{Alignment, LinkType};
171pub use pulldown_cmark::{CodeBlockKind, CowStr, InlineStr};
172use std::collections::HashMap;
173mod parsing;
174mod quotes;
175pub use parsing::{InlineParser, Parser};
176use std::convert::TryFrom;
177mod sub_and_superscript;
178
179/// Options for rendering
180#[derive(Debug)]
181pub struct Options {
182    /// include footnotes
183    enable_footnotes: bool,
184    /// Replace punctuation and use curly quotes
185    /// instead of straight
186    smarten: bool,
187    // Include GFM tables
188    //enable_tables: bool,
189    // Include GFM tasklists
190    //enable_tasklists: bool,
191    /// Include strikethrough
192    enable_strikethrough: bool,
193}
194
195impl Default for Options {
196    fn default() -> Self {
197        Options {
198            enable_footnotes: true,
199            smarten: true,
200            //enable_tables: false,
201            //enable_tasklists: false,
202            enable_strikethrough: false,
203        }
204    }
205}
206
207impl From<Options> for PulldownOptions {
208    fn from(src: Options) -> Self {
209        let mut options = PulldownOptions::empty();
210        if src.enable_strikethrough {
211            options.insert(PulldownOptions::ENABLE_STRIKETHROUGH);
212        }
213        if src.enable_footnotes {
214            options.insert(PulldownOptions::ENABLE_FOOTNOTES);
215        }
216        // if src.enable_tables {
217        // 	options.insert(PulldownOptions::ENABLE_TABLES);
218        // }
219        // if src.enable_tasklists {
220        // 	options.insert(PulldownOptions::ENABLE_TASKLISTS);
221        // }
222        options
223    }
224}
225
226/// A markdown event
227#[derive(Debug, Clone, PartialEq)]
228pub enum Event<'a> {
229    /// Start of a tagged element. Events that are yielded after this event and before its corresponding End event are inside this element. Start and end events are guaranteed to be balanced.
230    Start(Tag<'a>),
231    /// End of a tagged element
232    End(Tag<'a>),
233    /// Text
234    Text(CowStr<'a>),
235    /// Inline code
236    Code(CowStr<'a>),
237    /// Reference to a footnote
238    FootnoteReference(CowStr<'a>),
239    /// A soft line break ('\n')
240    SoftBreak,
241    /// A hard line break, corresponding to `<br/>` in html
242    HardBreak,
243    /// A horizontal rule
244    Rule,
245    /// A html node
246    Html(CowStr<'a>),
247    /// A tasklist marker, rendered as a checkbox in html;
248    /// an inner value of true indicates that it is checked.
249    TaskListMarker(bool),
250}
251
252impl<'a> From<Event<'a>> for PulldownEvent<'a> {
253    fn from(event: Event<'a>) -> Self {
254        match event {
255            Event::Start(Tag::Sans) => PulldownEvent::Html("<span class=\"sans\">".into()),
256            Event::Start(Tag::Centred) => PulldownEvent::Html("<span class=\"centred\">".into()),
257            Event::Start(Tag::SmallCaps) => {
258                PulldownEvent::Html("<span class=\"smallcaps\">".into())
259            }
260            Event::Start(Tag::RightAligned) => {
261                PulldownEvent::Html("<span class=\"right-aligned\">".into())
262            }
263            Event::Start(Tag::Superscript) => {
264                PulldownEvent::Html("<span class=\"superscript\">".into())
265            }
266            Event::Start(Tag::Subscript) => {
267                PulldownEvent::Html("<span class=\"subscript\">".into())
268            }
269            Event::End(Tag::Sans) => PulldownEvent::Html("</span>".into()),
270            Event::End(Tag::Centred) => PulldownEvent::Html("</span>".into()),
271            Event::End(Tag::SmallCaps) => PulldownEvent::Html("</span>".into()),
272            Event::End(Tag::RightAligned) => PulldownEvent::Html("</span>".into()),
273            Event::End(Tag::Superscript) => PulldownEvent::Html("</span>".into()),
274            Event::End(Tag::Subscript) => PulldownEvent::Html("</span>".into()),
275            Event::Start(t) => PulldownEvent::Start(PulldownTag::try_from(t).unwrap()),
276            Event::End(t) => PulldownEvent::End(PulldownTag::try_from(t).unwrap()),
277            Event::Text(t) => PulldownEvent::Text(t),
278            Event::Code(c) => PulldownEvent::Code(c),
279            Event::FootnoteReference(f) => PulldownEvent::FootnoteReference(f),
280            Event::SoftBreak => PulldownEvent::SoftBreak,
281            Event::HardBreak => PulldownEvent::HardBreak,
282            Event::Rule => PulldownEvent::Rule,
283            Event::Html(h) => PulldownEvent::Html(h),
284            Event::TaskListMarker(b) => PulldownEvent::TaskListMarker(b),
285        }
286    }
287}
288
289// note that this is not a nice complete transition;
290// it's just a rough conversion, so for example, html
291// won't be parsed into spans, quotations are left ambiguous, etc
292impl<'a> From<PulldownEvent<'a>> for Event<'a> {
293    fn from(src: PulldownEvent<'a>) -> Event<'a> {
294        match src {
295            PulldownEvent::Start(t) => Event::Start(t.into()),
296            PulldownEvent::End(t) => Event::End(t.into()),
297            PulldownEvent::Text(t) => Event::Text(t),
298            PulldownEvent::Code(t) => Event::Code(t),
299            PulldownEvent::Html(t) => Event::Html(t),
300            PulldownEvent::FootnoteReference(t) => Event::FootnoteReference(t),
301            PulldownEvent::SoftBreak => Event::SoftBreak,
302            PulldownEvent::HardBreak => Event::HardBreak,
303            PulldownEvent::Rule => Event::Rule,
304            PulldownEvent::TaskListMarker(t) => Event::TaskListMarker(t),
305        }
306    }
307}
308
309impl<'a> From<PulldownTag<'a>> for Tag<'a> {
310    fn from(src: PulldownTag<'a>) -> Tag<'a> {
311        match src {
312            PulldownTag::Paragraph => Tag::Paragraph,
313            PulldownTag::Heading(x) => Tag::Heading(x),
314            PulldownTag::BlockQuote => Tag::BlockQuote,
315            PulldownTag::CodeBlock(x) => Tag::CodeBlock(x),
316            PulldownTag::List(x) => Tag::List(x),
317            PulldownTag::Item => Tag::Item,
318            PulldownTag::FootnoteDefinition(x) => Tag::FootnoteDefinition(x),
319            PulldownTag::Table(x) => Tag::Table(x),
320            PulldownTag::TableHead => Tag::TableHead,
321            PulldownTag::TableRow => Tag::TableRow,
322            PulldownTag::TableCell => Tag::TableCell,
323            PulldownTag::Emphasis => Tag::Emphasis,
324            PulldownTag::Strong => Tag::Strong,
325            PulldownTag::Link(a, b, c) => Tag::Link(a, b, c),
326            PulldownTag::Image(a, b, c) => Tag::Image(a, b, c),
327            PulldownTag::Strikethrough => Tag::Strikethrough,
328        }
329    }
330}
331
332impl<'a> TryFrom<Tag<'a>> for PulldownTag<'a> {
333    type Error = ();
334    fn try_from(src: Tag<'a>) -> Result<Self, Self::Error> {
335        match src {
336            Tag::Paragraph => Ok(PulldownTag::Paragraph),
337            Tag::Heading(x) => Ok(PulldownTag::Heading(x)),
338            Tag::BlockQuote => Ok(PulldownTag::BlockQuote),
339            Tag::BlockQuotation => Ok(PulldownTag::BlockQuote),
340            Tag::CodeBlock(x) => Ok(PulldownTag::CodeBlock(x)),
341            Tag::List(x) => Ok(PulldownTag::List(x)),
342            Tag::Item => Ok(PulldownTag::Item),
343            Tag::FootnoteDefinition(x) => Ok(PulldownTag::FootnoteDefinition(x)),
344            Tag::Table(x) => Ok(PulldownTag::Table(x)),
345            Tag::TableHead => Ok(PulldownTag::TableHead),
346            Tag::TableRow => Ok(PulldownTag::TableRow),
347            Tag::TableCell => Ok(PulldownTag::TableCell),
348            Tag::Emphasis => Ok(PulldownTag::Emphasis),
349            Tag::Strong => Ok(PulldownTag::Strong),
350            Tag::Link(a, b, c) => Ok(PulldownTag::Link(a, b, c)),
351            Tag::Image(a, b, c) => Ok(PulldownTag::Image(a, b, c)),
352            _ => Err(()),
353        }
354    }
355}
356
357/// A tag containing other events
358#[derive(Debug, Clone, PartialEq)]
359pub enum Tag<'a> {
360    /// A paragraph of text and other inline elements
361    Paragraph,
362    /// A heading. The field indicates the level of the heading.
363    Heading(u32),
364    /// A block quote to be rendered as a `quote` in latex
365    BlockQuote,
366    /// A block quote to be rendered as a `quotation` in latex
367    BlockQuotation,
368    /// A code block
369    CodeBlock(CodeBlockKind<'a>),
370    /// A list. If the list is ordered the field indicates the number of the first item. Contains only list items.
371    List(Option<u64>),
372    /// A list item
373    Item,
374    /// The definition of a footnote
375    FootnoteDefinition(CowStr<'a>),
376    /// A table. Contains a vector describing the text-alignment for each of its columns.
377    Table(Vec<Alignment>),
378    /// A table header. Contains only `TableRows`. Note that the table body starts immediately after the closure of the `TableHead` tag. There is no `TableBody` tag.
379    TableHead,
380    /// A table row. Is used both for header rows as body rows. Contains only `TableCells`.
381    TableRow,
382    /// An individual table cell
383    TableCell,
384    /// Emphasised text
385    Emphasis,
386    /// Strong (bold) text
387    Strong,
388    /// An image. The first field is the link type, the second the destination URL and the third is a title.
389    Link(LinkType, CowStr<'a>, CowStr<'a>),
390    /// A link. The first field is the link type, the second the destination URL and the third is a title.
391    Image(LinkType, CowStr<'a>, CowStr<'a>),
392    /// Struck through text
393    Strikethrough,
394    // additions begin here
395    /// Sans text
396    Sans,
397    /// Centred text
398    Centred,
399    /// Text in small caps
400    SmallCaps,
401    /// Text that is aligned right
402    RightAligned,
403    /// Superscript text
404    Superscript,
405    /// Subscript text
406    Subscript,
407    /// A flattened footnote produced by `flatten_footnotes`
408    FlattenedFootnote,
409    /// A paragraph without an initial indent
410    UnindentedParagraph,
411}
412
413trait BoundaryMarker {
414    fn resets_quotes(&self) -> bool;
415}
416
417impl BoundaryMarker for PulldownEvent<'_> {
418    /// whether this event means that any quotes must necessarily be broken
419    fn resets_quotes(&self) -> bool {
420        use PulldownEvent::*;
421        match self {
422            Rule => true,
423            Text(_) => false,
424            Code(_) | Html(_) | FootnoteReference(_) => false,
425            SoftBreak | HardBreak | TaskListMarker(_) => false,
426            Start(PulldownTag::Emphasis) => false,
427            Start(PulldownTag::Strong) => false,
428            Start(PulldownTag::Link(_, _, _)) => false,
429            Start(PulldownTag::Image(_, _, _)) => false,
430            End(PulldownTag::Emphasis) => false,
431            End(PulldownTag::Strong) => false,
432            End(PulldownTag::Link(_, _, _)) => false,
433            End(PulldownTag::Image(_, _, _)) => false,
434            Start(_) => true,
435            End(_) => true,
436        }
437    }
438}
439
440/// Make a markdown event static; i.e. no longer pinned to the lifetime of the str used to produce it
441pub trait MakeStatic {
442    /// This type should simply be the original type with a static lifetime,
443    /// but has to be represented in this way to work around language limitations
444    type AsStatic;
445    /// transform this event
446    fn into_static(self) -> Self::AsStatic;
447}
448
449impl MakeStatic for CowStr<'_> {
450    type AsStatic = CowStr<'static>;
451    fn into_static(self) -> Self::AsStatic {
452        match self {
453            CowStr::Boxed(b) => CowStr::Boxed(b),
454            CowStr::Inlined(i) => CowStr::Inlined(i),
455            CowStr::Borrowed(s) => s.to_string().into(),
456        }
457    }
458}
459
460impl<'a> MakeStatic for PulldownTag<'a> {
461    type AsStatic = PulldownTag<'static>;
462    fn into_static(self) -> Self::AsStatic {
463        use PulldownTag::*;
464
465        match self {
466            CodeBlock(x) => CodeBlock(x.into_static()),
467            List(x) => List(x),
468            Item => Item,
469            FootnoteDefinition(x) => FootnoteDefinition(x.into_static()),
470            Table(x) => Table(x),
471            TableHead => TableHead,
472            TableRow => TableRow,
473            TableCell => TableCell,
474            Emphasis => Emphasis,
475            Strong => Strong,
476            Link(a, b, c) => Link(a, b.into_static(), c.into_static()),
477            Image(a, b, c) => Image(a, b.into_static(), c.into_static()),
478            Paragraph => Paragraph,
479            Heading(x) => Heading(x),
480            BlockQuote => BlockQuote,
481            Strikethrough => Strikethrough,
482        }
483    }
484}
485
486impl MakeStatic for PulldownEvent<'_> {
487    type AsStatic = PulldownEvent<'static>;
488    fn into_static(self) -> Self::AsStatic {
489        use PulldownEvent::*;
490        match self {
491            Text(t) => Text(t.into_static()),
492            Start(t) => Start(t.into_static()),
493            End(t) => End(t.into_static()),
494            Code(c) => Code(c.into_static()),
495            FootnoteReference(f) => FootnoteReference(f.into_static()),
496            SoftBreak => SoftBreak,
497            HardBreak => HardBreak,
498            Rule => Rule,
499            Html(h) => Html(h.into_static()),
500            TaskListMarker(b) => TaskListMarker(b),
501        }
502    }
503}
504
505impl MakeStatic for CodeBlockKind<'_> {
506    type AsStatic = CodeBlockKind<'static>;
507    fn into_static(self) -> Self::AsStatic {
508        match self {
509            CodeBlockKind::Indented => CodeBlockKind::Indented,
510            CodeBlockKind::Fenced(l) => CodeBlockKind::Fenced(l.into_static()),
511        }
512    }
513}
514
515impl MakeStatic for Tag<'_> {
516    type AsStatic = Tag<'static>;
517    fn into_static(self) -> Self::AsStatic {
518        use Tag::*;
519        match self {
520            CodeBlock(x) => CodeBlock(x.into_static()),
521            List(x) => List(x),
522            Item => Item,
523            FootnoteDefinition(x) => FootnoteDefinition(x.into_static()),
524            Table(x) => Table(x),
525            TableHead => TableHead,
526            TableRow => TableRow,
527            TableCell => TableCell,
528            Emphasis => Emphasis,
529            Strong => Strong,
530            Link(a, b, c) => Link(a, b.into_static(), c.into_static()),
531            Image(a, b, c) => Image(a, b.into_static(), c.into_static()),
532            Sans => Sans,
533            Centred => Centred,
534            SmallCaps => SmallCaps,
535            RightAligned => RightAligned,
536            Superscript => Superscript,
537            Subscript => Subscript,
538            FlattenedFootnote => FlattenedFootnote,
539            Paragraph => Paragraph,
540            Heading(x) => Heading(x),
541            BlockQuote => BlockQuote,
542            BlockQuotation => BlockQuotation,
543            Strikethrough => Strikethrough,
544            UnindentedParagraph => UnindentedParagraph,
545        }
546    }
547}
548
549impl MakeStatic for Event<'_> {
550    type AsStatic = Event<'static>;
551    fn into_static(self) -> Self::AsStatic {
552        use Event::*;
553        match self {
554            Text(t) => Text(t.into_static()),
555            Start(t) => Start(t.into_static()),
556            End(t) => End(t.into_static()),
557            Code(c) => Code(c.into_static()),
558            FootnoteReference(f) => FootnoteReference(f.into_static()),
559            SoftBreak => SoftBreak,
560            HardBreak => HardBreak,
561            Rule => Rule,
562            Html(h) => Html(h.into_static()),
563            TaskListMarker(b) => TaskListMarker(b),
564        }
565    }
566}
567
568/// Replace `Event::FootnoteReference(f)` and a seperate definition `Event::Start(Tag::FootnoteDefinition(f))...Event::End(Tag::FootnoteDefinition(f))`
569/// with (at the point where `Event::FootnoteReference(f)` was) `Event::Start(Tag::FlattenedFootnote)...Event::End(Tag::FlattenedFootnote)`
570///
571/// If a footnote reference has no definition, an empty string of text will be inserted instead.
572/// # Example
573///
574/// ```
575/// use extended_pulldown::Event::*;
576/// use extended_pulldown::Tag;
577///
578/// let events = vec![
579///   Start(Tag::Paragraph),
580///   Text("Hello".into()),
581///   FootnoteReference("1".into()),
582///   End(Tag::Paragraph),
583///   Start(Tag::FootnoteDefinition("1".into())),
584///   Start(Tag::Paragraph),
585///   Text("World".into()),
586///   End(Tag::Paragraph),
587///   End(Tag::FootnoteDefinition("1".into())),
588/// ];
589///
590/// let flattened = extended_pulldown::flatten_footnotes(events);
591/// let expected = vec![
592///   Start(Tag::Paragraph),
593///   Text("Hello".into()),
594///   Start(Tag::FlattenedFootnote),
595///   Text("World".into()),
596///   End(Tag::FlattenedFootnote),
597///   End(Tag::Paragraph)
598///];
599///
600/// assert_eq!(flattened, expected);
601/// ```
602pub fn flatten_footnotes<'a, I>(src: I) -> Vec<Event<'a>>
603where
604    I: IntoIterator<Item = Event<'a>>,
605{
606    let mut non_footnotes = Vec::new();
607    let mut footnotes = HashMap::new();
608
609    let mut definitions_len = 0;
610
611    let mut fb = Vec::new();
612    let mut in_footnote = false;
613    for event in src {
614        match event {
615            Event::Start(Tag::FootnoteDefinition(_)) => {
616                in_footnote = true;
617            }
618            Event::End(Tag::FootnoteDefinition(d)) => {
619                in_footnote = false;
620                let mut definition = std::mem::take(&mut fb);
621                if let (Some(Event::Start(Tag::Paragraph)), Some(Event::End(Tag::Paragraph))) =
622                    (definition.first(), definition.last())
623                {
624                    definition.remove(0);
625                    definition.pop();
626                }
627                definitions_len += definition.len() + 1;
628                footnotes.insert(d, definition);
629            }
630            other => {
631                if in_footnote {
632                    fb.push(other);
633                } else {
634                    non_footnotes.push(other);
635                }
636            }
637        }
638    }
639
640    let mut out = Vec::with_capacity(non_footnotes.len() + definitions_len);
641    for event in non_footnotes.into_iter() {
642        match event {
643            Event::FootnoteReference(f) => match footnotes.remove(&f) {
644                Some(mut definition) => {
645                    out.push(Event::Start(Tag::FlattenedFootnote));
646                    out.append(&mut definition);
647                    out.push(Event::End(Tag::FlattenedFootnote));
648                }
649                None => {
650                    out.push(Event::Start(Tag::FlattenedFootnote));
651                    out.push(Event::Text("".into()));
652                    out.push(Event::End(Tag::FlattenedFootnote));
653                }
654            },
655            other => out.push(other),
656        }
657    }
658    out
659}
660
661#[cfg(test)]
662mod tests {
663    use super::*;
664
665    #[test]
666    fn test_cross_boundaries() {
667        let markdown = "Pre 'Hello\nWorld' Post";
668        let smart = smart_markdown(markdown);
669        assert_eq!(smart, "Pre ‘Hello\nWorld’ Post");
670        let a = "She wrote: 'It will be delightful. I am ready to do
671anything, anything for you. It is a glorious idea. I know the wife of a
672very high personage in the Administration, and also a man who has lots
673of influence with,' etc.";
674        let smart = smart_markdown(a);
675        let expected = "She wrote: ‘It will be delightful. I am ready to do
676anything, anything for you. It is a glorious idea. I know the wife of a
677very high personage in the Administration, and also a man who has lots
678of influence with,’ etc.";
679        assert_eq!(smart, expected);
680        let b = "'When Mr. Kurtz,' I continued, severely, 'is
681General Manager, you won't have the opportunity.'";
682        let smart = smart_markdown(b);
683        let expected = "‘When Mr. Kurtz,’ I continued, severely, ‘is
684General Manager, you won't have the opportunity.’";
685        assert_eq!(smart, expected);
686        let c = "A blinding sunlight drowned all this at times
687in a sudden recrudescence of glare. 'There's your Company's station,'
688said the Swede, pointing to three wooden barrack-like structures on the
689rocky slope. 'I will send your things up. Four boxes did you say? So.
690Farewell.'";
691        let smart = smart_markdown(c);
692        let expected = "A blinding sunlight drowned all this at times
693in a sudden recrudescence of glare. ‘There's your Company's station,’
694said the Swede, pointing to three wooden barrack-like structures on the
695rocky slope. ‘I will send your things up. Four boxes did you say? So.
696Farewell.’";
697        assert_eq!(smart, expected);
698    }
699
700    #[test]
701    fn prev_integration_test_failures() {
702        let a = "then--you see--I felt somehow
703I must get there by hook or by crook. The men said
704'My dear fellow,' and did nothing.";
705        let smart = smart_markdown(a);
706        let expected = "then–you see–I felt somehow
707I must get there by hook or by crook. The men said
708‘My dear fellow,’ and did nothing.";
709        assert_eq!(smart, expected);
710        let a = "He lifted a warning forefinger....
711'*Du calme, du calme*.'";
712        let expected = "He lifted a warning forefinger….
713‘*Du calme, du calme*.’";
714        let smart = smart_markdown(a);
715        assert_eq!(smart, expected);
716
717        let a = "'catch 'im. Give 'im to us.'";
718        let expected = "‘catch 'im. Give 'im to us.’";
719        let smart = smart_markdown(a);
720        assert_eq!(smart, expected);
721    }
722
723    /// smarten markdown by turning a handful of latex glyphs into
724    /// unicode characters, and by attempting to replace straight with curly quotes,
725    /// with a very dodgy writer to turn them back into a string
726    fn smart_markdown(markdown: &str) -> String {
727        let parser = Parser::new(markdown).map(PulldownEvent::from);
728        let mut out = String::new();
729
730        use PulldownEvent::*;
731
732        for event in parser {
733            match event {
734                Text(t) => out.push_str(&t),
735                Start(PulldownTag::Paragraph) => {
736                    if !out.is_empty() {
737                        out.push('\n');
738                    }
739                }
740                End(PulldownTag::Paragraph) => {
741                    out.push_str("\n");
742                }
743                Start(PulldownTag::Emphasis) | End(PulldownTag::Emphasis) => {
744                    out.push('*');
745                }
746                Start(PulldownTag::CodeBlock(_)) => out.push_str("\n````\n"),
747                End(PulldownTag::CodeBlock(_)) => out.push_str("````\n"),
748                SoftBreak => out.push_str("\n"),
749                e => {
750                    println!("{:?}", e);
751                    panic!()
752                }
753            }
754        }
755
756        out.trim_end().to_string()
757    }
758
759    #[test]
760    fn tricky_quotes() {
761        let markdown = "'I'd like to see some of that 70's style again,' Patrick O'Postrophe said, 'even though it's '20.'";
762        let smart = smart_markdown(markdown);
763        assert_eq!(smart, "‘I'd like to see some of that 70's style again,’ Patrick O'Postrophe said, ‘even though it's '20.’");
764
765        let a = smart_markdown("'Hmm. 'Tis all one, Robert Post's child.'");
766        let c = smart_markdown("'Gossip on Forsyte 'Change was not restrained.'");
767
768        assert_eq!(a, "‘Hmm. 'Tis all one, Robert Post's child.’");
769        assert_eq!(c, "‘Gossip on Forsyte 'Change was not restrained.’");
770    }
771
772    #[test]
773    fn forgotten_closing_quote_does_not_extend_over_para_boundaries() {
774        let with_break = "'He's so meticulous\n\nThere was a pause... 'If you're sure.'";
775        let smart = smart_markdown(with_break);
776        assert_eq!(
777            smart,
778            "‘He's so meticulous\n\nThere was a pause… ‘If you're sure.’"
779        );
780    }
781
782    #[test]
783    fn galsworthy() {
784        let markdown = "'E'en so many years later, 'tis an item of gossip on Forsyte 'Change that I'd marry 'im yet.'";
785        let smart = smart_markdown(markdown);
786        assert_eq!(smart, "‘E'en so many years later, 'tis an item of gossip on Forsyte 'Change that I'd marry 'im yet.’");
787    }
788
789    #[test]
790    fn leave_verbatim_alone() {
791        let markdown = "'Hello World' is a traditional first program. Here it is in Python:\n\n```\nprint(\"Hello World\")\n```\n\nThat's nice.";
792        let smart = smart_markdown(markdown);
793        assert_eq!(smart, "‘Hello World’ is a traditional first program. Here it is in Python:\n\n````\nprint(\"Hello World\")\n````\n\nThat's nice.");
794    }
795
796    #[test]
797    fn multi_para_open_quote() {
798        // lots of old texts use a single double quote at the opening of a paragraph for reported speech. Check that we don't close such:
799        let text = "\"A\n\n\"B";
800        let smart = smart_markdown(text);
801        assert_eq!(smart, "“A\n\n“B");
802    }
803
804    #[test]
805    fn double_and_single_confluence() {
806        let a = "'It's---after all---the season, e'en if the situation is a *little* complicated,' he said. 'My mother always said \"Say something nice if you can.\"'";
807        let smart = smart_markdown(a);
808        let expected = "‘It's—after all—the season, e'en if the situation is a *little* complicated,’ he said. ‘My mother always said “Say something nice if you can.”’";
809        assert_eq!(smart, expected);
810    }
811
812    #[test]
813    fn quote_transformation() {
814        let markdown = "'This isn't that clever,' she said. 'No, \"Real cleverness would understand semantics, not stacks\" --- as Hiram Maxim didn't quite get around to saying.'\n\n'It'll just have to do,' he replied.";
815        let smart = smart_markdown(markdown);
816        assert_eq!("‘This isn't that clever,’ she said. ‘No, “Real cleverness would understand semantics, not stacks” — as Hiram Maxim didn't quite get around to saying.’\n\n‘It'll just have to do,’ he replied.", smart);
817    }
818
819    #[test]
820    fn simple_as() {
821        let markdown = "'Hello World,' he said.";
822        let smart = smart_markdown(markdown);
823        assert_eq!(smart, "‘Hello World,’ he said.");
824    }
825
826    #[test]
827    fn apostrophe_after_opening_quote() {
828        let markdown = "''Tis after all the season, e'en if the situation is a *little* complicated,' he said.";
829        let smart = smart_markdown(markdown);
830        assert_eq!("‘'Tis after all the season, e'en if the situation is a *little* complicated,’ he said.", smart);
831    }
832
833    #[test]
834    fn special_spans() {
835        let text = "<span class=\"sans\">Hello</span> <span class=\"smallcaps\">World</span>";
836
837        let a = Parser::new(text).collect::<Vec<_>>();
838        use Event::*;
839        use Tag::*;
840
841        let expected_a = vec![
842            Start(Paragraph),
843            Start(Sans),
844            Text("Hello".into()),
845            End(Sans),
846            Text(" ".into()),
847            Start(SmallCaps),
848            Text("World".into()),
849            End(SmallCaps),
850            End(Paragraph),
851        ];
852        assert_eq!(a, expected_a);
853    }
854
855    #[test]
856    fn stacked_special_spans() {
857        let text = "<span class=\"sans\"><span class=\"inner\">Hello's</span></span> <span class=\"smallcaps\">World</span>";
858        let b = Parser::new(text).collect::<Vec<_>>();
859        use Event::*;
860        use Tag::*;
861        let expected_b = vec![
862            Start(Paragraph),
863            Start(Sans),
864            Html("<span class=\"inner\">".into()),
865            Text("Hello's".into()),
866            Html("</span>".into()),
867            End(Sans),
868            Text(" ".into()),
869            Start(SmallCaps),
870            Text("World".into()),
871            End(SmallCaps),
872            End(Paragraph),
873        ];
874        assert_eq!(b, expected_b);
875    }
876
877    #[test]
878    fn multi_para_footnotes() {
879        let text = "Hello World[^footnote]\n\n[^footnote]:\n\tA footnote\n\n\tIn *multiple* pieces";
880        let p = Parser::new(text).collect::<Vec<_>>();
881        use Event::*;
882        use Tag::*;
883
884        let expected = vec![
885            Start(Paragraph),
886            Text("Hello World".into()),
887            FootnoteReference("footnote".into()),
888            End(Paragraph),
889            Start(FootnoteDefinition("footnote".into())),
890            Start(Paragraph),
891            Text("A footnote".into()),
892            End(Paragraph),
893            Start(Paragraph),
894            Text("In ".into()),
895            Start(Emphasis),
896            Text("multiple".into()),
897            End(Emphasis),
898            Text(" pieces".into()),
899            End(Paragraph),
900            End(FootnoteDefinition("footnote".into())),
901        ];
902
903        assert_eq!(p, expected);
904    }
905
906    #[test]
907    fn super_and_sub() {
908        let valid_superscripts = Parser::new("'Quoted.' a^bc^d a^hello^").collect::<Vec<_>>();
909        let invalid_superscripts =
910            Parser::new("'Quoted.' a^^ a^With space^ unpaired^").collect::<Vec<_>>();
911
912        let expected_invalid = vec![
913            Event::Start(Tag::Paragraph),
914            Event::Text("‘Quoted.’ a^^ a^With space^ unpaired^".into()),
915            Event::End(Tag::Paragraph),
916        ];
917
918        let expected_valid = vec![
919            Event::Start(Tag::Paragraph),
920            Event::Text("‘Quoted.’ a".into()),
921            Event::Start(Tag::Superscript),
922            Event::Text("bc".into()),
923            Event::End(Tag::Superscript),
924            Event::Text("d a".into()),
925            Event::Start(Tag::Superscript),
926            Event::Text("hello".into()),
927            Event::End(Tag::Superscript),
928            Event::End(Tag::Paragraph),
929        ];
930
931        assert_eq!(invalid_superscripts, expected_invalid);
932        assert_eq!(valid_superscripts, expected_valid);
933    }
934
935    #[test]
936    fn blockquotes() {
937        use Event::*;
938        use Tag::*;
939
940        let text = Parser::new(
941            "This checks quotes.\n\n> Single para\n\nNow multi:\n\n> Para 1...\n>\n> Para 2",
942        )
943        .collect::<Vec<_>>();
944        let expected = vec![
945            Start(Paragraph),
946            Text(CowStr::Borrowed("This checks quotes.")),
947            End(Paragraph),
948            Start(BlockQuote),
949            Start(Paragraph),
950            Text(CowStr::Borrowed("Single para")),
951            End(Paragraph),
952            End(BlockQuote),
953            Start(Paragraph),
954            Text(CowStr::Borrowed("Now multi:")),
955            End(Paragraph),
956            Start(BlockQuotation),
957            Start(Paragraph),
958            Text(CowStr::Boxed("Para 1…".into())),
959            End(Paragraph),
960            Start(Paragraph),
961            Text(CowStr::Borrowed("Para 2")),
962            End(Paragraph),
963            End(BlockQuotation),
964        ];
965        assert_eq!(text, expected);
966    }
967
968    #[test]
969    fn emphasis_drop() {
970        use Event::*;
971        use Tag::*;
972
973        let text =
974            Parser::new("This has *emphasis* (among the 1^st^ of its kind)").collect::<Vec<_>>();
975
976        let expected = vec![
977            Start(Paragraph),
978            Text(CowStr::Borrowed("This has ")),
979            Start(Emphasis),
980            Text(CowStr::Borrowed("emphasis")),
981            End(Emphasis),
982            Text(CowStr::Inlined(' '.into())),
983            Text(CowStr::Inlined(
984                InlineStr::try_from("(among the 1").unwrap(),
985            )),
986            Start(Superscript),
987            Text(CowStr::Inlined(InlineStr::try_from("st").unwrap())),
988            End(Superscript),
989            Text(CowStr::Inlined(
990                InlineStr::try_from(" of its kind)").unwrap(),
991            )),
992            End(Paragraph),
993        ];
994
995        let paired = text.into_iter().zip(expected);
996
997        for (received, expected) in paired {
998            assert_eq!(received, expected);
999        }
1000    }
1001}