Skip to main content

kas_text/format/
markdown.rs

1// Licensed under the Apache License, Version 2.0 (the "License");
2// you may not use this file except in compliance with the License.
3// You may obtain a copy of the License in the LICENSE-APACHE file or at:
4//     https://www.apache.org/licenses/LICENSE-2.0
5
6//! Markdown parsing
7
8use super::{FontToken, FormattableText};
9use crate::conv::to_u32;
10use crate::fonts::{FamilySelector, FontSelector, FontStyle, FontWeight};
11use crate::{Effect, EffectFlags};
12use pulldown_cmark::{Event, HeadingLevel, Tag, TagEnd};
13use std::fmt::Write;
14use std::iter::FusedIterator;
15use thiserror::Error;
16
17/// Markdown parsing errors
18#[derive(Error, Debug)]
19pub enum Error {
20    #[error("Not supported by Markdown parser: {0}")]
21    NotSupported(&'static str),
22}
23
24/// Basic Markdown formatter
25///
26/// Currently this misses several important Markdown features, but may still
27/// prove a convenient way of constructing formatted texts.
28///
29/// Supported:
30///
31/// -   Text paragraphs
32/// -   Code (embedded and blocks); caveat: extra line after code blocks
33/// -   Explicit line breaks
34/// -   Headings
35/// -   Lists (numerated and bulleted); caveat: indentation after first line
36/// -   Bold, italic (emphasis), strike-through
37///
38/// Not supported:
39///
40/// -   Block quotes
41/// -   Footnotes
42/// -   HTML
43/// -   Horizontal rules
44/// -   Images
45/// -   Links
46/// -   Tables
47/// -   Task lists
48#[derive(Clone, Debug, Default, PartialEq)]
49pub struct Markdown {
50    text: String,
51    fmt: Vec<Fmt>,
52    effects: Vec<Effect>,
53}
54
55impl Markdown {
56    /// Parse the input as Markdown
57    ///
58    /// Parsing happens immediately. Fonts must be initialized before calling
59    /// this method.
60    #[inline]
61    pub fn new(input: &str) -> Result<Self, Error> {
62        parse(input)
63    }
64}
65
66pub struct FontTokenIter<'a> {
67    index: usize,
68    fmt: &'a [Fmt],
69    base_dpem: f32,
70}
71
72impl<'a> FontTokenIter<'a> {
73    fn new(fmt: &'a [Fmt], base_dpem: f32) -> Self {
74        FontTokenIter {
75            index: 0,
76            fmt,
77            base_dpem,
78        }
79    }
80}
81
82impl<'a> Iterator for FontTokenIter<'a> {
83    type Item = FontToken;
84
85    fn next(&mut self) -> Option<FontToken> {
86        if self.index < self.fmt.len() {
87            let fmt = &self.fmt[self.index];
88            self.index += 1;
89            Some(FontToken {
90                start: fmt.start,
91                font: fmt.font,
92                dpem: self.base_dpem * fmt.rel_size,
93            })
94        } else {
95            None
96        }
97    }
98
99    fn size_hint(&self) -> (usize, Option<usize>) {
100        let len = self.fmt.len();
101        (len, Some(len))
102    }
103}
104
105impl<'a> ExactSizeIterator for FontTokenIter<'a> {}
106impl<'a> FusedIterator for FontTokenIter<'a> {}
107
108impl FormattableText for Markdown {
109    type FontTokenIter<'a> = FontTokenIter<'a>;
110
111    #[inline]
112    fn as_str(&self) -> &str {
113        &self.text
114    }
115
116    #[inline]
117    fn font_tokens<'a>(&'a self, dpem: f32) -> Self::FontTokenIter<'a> {
118        FontTokenIter::new(&self.fmt, dpem)
119    }
120
121    fn effect_tokens(&self) -> &[Effect] {
122        &self.effects
123    }
124}
125
126fn parse(input: &str) -> Result<Markdown, Error> {
127    let mut text = String::with_capacity(input.len());
128    let mut fmt: Vec<Fmt> = Vec::new();
129    let mut set_last = |item: &StackItem| {
130        let f = Fmt::new(item);
131        if let Some(last) = fmt.last_mut()
132            && last.start >= item.start
133        {
134            *last = f;
135            return;
136        }
137        fmt.push(f);
138    };
139
140    let mut state = State::None;
141    let mut stack = Vec::with_capacity(16);
142    let mut item = StackItem::default();
143
144    let options = pulldown_cmark::Options::ENABLE_STRIKETHROUGH;
145    for ev in pulldown_cmark::Parser::new_ext(input, options) {
146        match ev {
147            Event::Start(tag) => {
148                item.start = to_u32(text.len());
149                if let Some(clone) = item.start_tag(&mut text, &mut state, tag)? {
150                    stack.push(item);
151                    item = clone;
152                    set_last(&item);
153                }
154            }
155            Event::End(tag) => {
156                if item.end_tag(&mut state, tag) {
157                    item = stack.pop().unwrap();
158                    item.start = to_u32(text.len());
159                    set_last(&item);
160                }
161            }
162            Event::Text(part) => {
163                state.part(&mut text);
164                text.push_str(&part);
165            }
166            Event::Code(part) => {
167                state.part(&mut text);
168                item.start = to_u32(text.len());
169
170                let mut item2 = item.clone();
171                item2.sel.family = FamilySelector::MONOSPACE;
172                set_last(&item2);
173
174                text.push_str(&part);
175
176                item.start = to_u32(text.len());
177                set_last(&item);
178            }
179            Event::InlineMath(_) | Event::DisplayMath(_) => {
180                return Err(Error::NotSupported("math expressions"));
181            }
182            Event::Html(_) | Event::InlineHtml(_) => {
183                return Err(Error::NotSupported("embedded HTML"));
184            }
185            Event::FootnoteReference(_) => return Err(Error::NotSupported("footnote")),
186            Event::SoftBreak => state.soft_break(&mut text),
187            Event::HardBreak => state.hard_break(&mut text),
188            Event::Rule => return Err(Error::NotSupported("horizontal rule")),
189            Event::TaskListMarker(_) => return Err(Error::NotSupported("task list")),
190        }
191    }
192
193    // TODO(opt): don't need to store flags in fmt?
194    let mut effects = Vec::new();
195    let mut flags = EffectFlags::default();
196    for token in &fmt {
197        if token.flags != flags {
198            effects.push(Effect {
199                start: token.start,
200                e: 0,
201                flags: token.flags,
202            });
203            flags = token.flags;
204        }
205    }
206
207    Ok(Markdown { text, fmt, effects })
208}
209
210#[derive(Copy, Clone, Debug, PartialEq)]
211enum State {
212    None,
213    BlockStart,
214    BlockEnd,
215    ListItem,
216    Part,
217}
218
219impl State {
220    fn start_block(&mut self, text: &mut String) {
221        match *self {
222            State::None | State::BlockStart => (),
223            State::BlockEnd | State::ListItem | State::Part => text.push_str("\n\n"),
224        }
225        *self = State::BlockStart;
226    }
227    fn end_block(&mut self) {
228        *self = State::BlockEnd;
229    }
230    fn part(&mut self, text: &mut String) {
231        match *self {
232            State::None | State::BlockStart | State::Part | State::ListItem => (),
233            State::BlockEnd => text.push_str("\n\n"),
234        }
235        *self = State::Part;
236    }
237    fn list_item(&mut self, text: &mut String) {
238        match *self {
239            State::None | State::BlockStart | State::BlockEnd => {
240                debug_assert_eq!(*self, State::BlockStart);
241            }
242            State::ListItem | State::Part => text.push('\n'),
243        }
244        *self = State::ListItem;
245    }
246    fn soft_break(&mut self, text: &mut String) {
247        text.push(' ');
248    }
249    fn hard_break(&mut self, text: &mut String) {
250        text.push('\n');
251    }
252}
253
254#[derive(Clone, Debug, PartialEq)]
255pub struct Fmt {
256    start: u32,
257    font: FontSelector,
258    rel_size: f32,
259    flags: EffectFlags,
260}
261
262impl Fmt {
263    fn new(item: &StackItem) -> Self {
264        Fmt {
265            start: item.start,
266            font: item.sel,
267            rel_size: item.rel_size,
268            flags: item.flags,
269        }
270    }
271}
272
273#[derive(Clone, Debug)]
274struct StackItem {
275    list: Option<u64>,
276    start: u32,
277    sel: FontSelector,
278    rel_size: f32,
279    flags: EffectFlags,
280}
281
282impl Default for StackItem {
283    fn default() -> Self {
284        StackItem {
285            list: None,
286            start: 0,
287            sel: Default::default(),
288            rel_size: 1.0,
289            flags: EffectFlags::empty(),
290        }
291    }
292}
293
294impl StackItem {
295    // process a tag; may modify current item and may return new item
296    fn start_tag(
297        &mut self,
298        text: &mut String,
299        state: &mut State,
300        tag: Tag,
301    ) -> Result<Option<Self>, Error> {
302        fn with_clone<F: Fn(&mut StackItem)>(s: &mut StackItem, c: F) -> Option<StackItem> {
303            let mut item = s.clone();
304            c(&mut item);
305            Some(item)
306        }
307
308        Ok(match tag {
309            Tag::Paragraph => {
310                state.start_block(text);
311                None
312            }
313            Tag::Heading { level, .. } => {
314                state.start_block(text);
315                self.start = to_u32(text.len());
316                with_clone(self, |item| {
317                    // CSS sizes: https://www.w3.org/TR/2018/REC-css-fonts-3-20180920/#font-size-prop
318                    item.rel_size = match level {
319                        HeadingLevel::H1 => 2.0 / 1.0,
320                        HeadingLevel::H2 => 3.0 / 2.0,
321                        HeadingLevel::H3 => 6.0 / 5.0,
322                        HeadingLevel::H4 => 1.0,
323                        HeadingLevel::H5 => 8.0 / 9.0,
324                        HeadingLevel::H6 => 3.0 / 5.0,
325                    }
326                })
327            }
328            Tag::CodeBlock(_) => {
329                state.start_block(text);
330                self.start = to_u32(text.len());
331                with_clone(self, |item| {
332                    item.sel.family = FamilySelector::MONOSPACE;
333                })
334                // TODO: within a code block, the last \n should be suppressed?
335            }
336            Tag::HtmlBlock => return Err(Error::NotSupported("embedded HTML")),
337            Tag::List(start) => {
338                state.start_block(text);
339                self.list = start;
340                None
341            }
342            Tag::Item => {
343                state.list_item(text);
344                // NOTE: we use \t for indent, which indents only the first
345                // line. Without better flow control we cannot fix this.
346                match &mut self.list {
347                    Some(x) => {
348                        write!(text, "{x}\t").unwrap();
349                        *x += 1;
350                    }
351                    None => text.push_str("•\t"),
352                }
353                None
354            }
355            Tag::Emphasis => with_clone(self, |item| item.sel.style = FontStyle::Italic),
356            Tag::Strong => with_clone(self, |item| item.sel.weight = FontWeight::BOLD),
357            Tag::Strikethrough => with_clone(self, |item| {
358                item.flags.set(EffectFlags::STRIKETHROUGH, true)
359            }),
360            Tag::BlockQuote(_) => return Err(Error::NotSupported("block quote")),
361            Tag::FootnoteDefinition(_) => return Err(Error::NotSupported("footnote")),
362            Tag::DefinitionList | Tag::DefinitionListTitle | Tag::DefinitionListDefinition => {
363                return Err(Error::NotSupported("definition"));
364            }
365            Tag::Table(_) | Tag::TableHead | Tag::TableRow | Tag::TableCell => {
366                return Err(Error::NotSupported("table"));
367            }
368            Tag::Superscript | Tag::Subscript => {
369                // kas-text doesn't support adjusting the baseline
370                return Err(Error::NotSupported("super/subscript"));
371            }
372            Tag::Link { .. } => return Err(Error::NotSupported("link")),
373            Tag::Image { .. } => return Err(Error::NotSupported("image")),
374            Tag::MetadataBlock(_) => return Err(Error::NotSupported("metadata block")),
375        })
376    }
377    // returns true if stack must be popped
378    fn end_tag(&self, state: &mut State, tag: TagEnd) -> bool {
379        match tag {
380            TagEnd::Paragraph | TagEnd::List(_) => {
381                state.end_block();
382                false
383            }
384            TagEnd::Heading(_) | TagEnd::CodeBlock => {
385                state.end_block();
386                true
387            }
388            TagEnd::Item => false,
389            TagEnd::Emphasis | TagEnd::Strong | TagEnd::Strikethrough => true,
390            tag => unimplemented!("{:?}", tag),
391        }
392    }
393}