ahtml_from_markdown/
markdown.rs

1//! Convert markdown to HTML.
2
3use std::{
4    collections::HashMap,
5    fmt::{Debug, Display},
6    fs::read_to_string,
7    panic::RefUnwindSafe,
8    path::PathBuf,
9    str::Utf8Error,
10    string::FromUtf8Error,
11};
12
13use anyhow::Context;
14use html5gum::{HtmlString, Token};
15use kstring::KString;
16use pulldown_cmark::{Event, HeadingLevel, LinkType, Options, Parser, Tag};
17
18use ahtml::{
19    att, flat::Flat, AId, ASlice, AVec, Element, HtmlAllocator, Node, Print, BLOCKQUOTE_META,
20    DIV_META, EM_META, H1_META, H2_META, H3_META, H4_META, H5_META, H6_META, LI_META, METADB,
21    OL_META, PRE_META, P_META, STRONG_META, S_META, TABLE_META, TD_META, TH_META, TITLE_META,
22    TR_META, UL_META,
23};
24use ahtml_html::meta::ElementMeta;
25
26use chj_util::{nodt as dt, nowarn as warn, nowarn_todo as warn_todo};
27
28use crate::{
29    myfrom::kstring_myfrom2,
30    try_option,
31    util::{autovivify_last, enum_name, infinite_sequence},
32    webutils_simple::email_url,
33};
34
35#[derive(thiserror::Error, Debug)]
36pub enum MarkdownFileError {
37    #[error("not an HTML5 tag name in {} tag: {:?}",
38            if *is_opening { "opening" } else { "closing" },
39            name.as_str())]
40    NotAnHTML5TagName { name: KString, is_opening: bool },
41    #[error("non-balanced tags: <{opening}> ending as </{closing}>")]
42    UnbalancedTags { opening: String, closing: String },
43
44    #[error("multiple definitions of a footnote with the label {label:?}")]
45    MultipleFootnoteWithLabel { label: KString },
46    #[error("unused footnote {:?}", label.as_str())]
47    UnusedFootnote { label: KString },
48    #[error("missing definition for footnote {:?}", label.as_str())]
49    MissingFootnoteDefinition { label: KString },
50
51    #[error("multiple <title> elements")]
52    MultipleTitleElements,
53    // Really should record the location, not the input!
54    #[error("HTML5 parsing error: {error} for {input:?}")]
55    HTML5ParsingError {
56        error: html5gum::Error,
57        input: Box<str>,
58    },
59    #[error("{n} non-closed context(s) at end of markdown document: {msg}")]
60    NonClosedContexts { n: usize, msg: String },
61    #[error("can't shift header levels by {diff} because getting out of range")]
62    HeaderLevelShiftOutOfRange { diff: i32 },
63
64    #[error("anyhow: {0}")]
65    Anyhow(#[from] anyhow::Error),
66    #[error("UTF-8 decoding error: {0}")]
67    Utf8Error(#[from] Utf8Error),
68    #[error("UTF-8 decoding error: {0}")]
69    FromUtf8Error(#[from] FromUtf8Error),
70}
71
72/// This can't be replaced with `att` or the MyFrom trait, because it
73/// can fail.
74fn kstring(s: HtmlString) -> Result<KString, FromUtf8Error> {
75    Ok(KString::from_string(String::from_utf8(s.0)?))
76}
77
78// ------------------------------------------------------------------
79// Formatting parametrization
80
81pub trait StylingInterface: Send + Sync + RefUnwindSafe {
82    fn new_context<'c>(
83        &'c self,
84        html: &HtmlAllocator,
85    ) -> anyhow::Result<Box<dyn StylingContextInterface<'c> + 'c>>;
86}
87
88pub trait StylingContextInterface<'c> {
89    fn format_footnote_definition(
90        &self,
91        html: &HtmlAllocator,
92        reference: &Footnoteref,
93        backreferences: &[Backref],
94        clean_slice: &ASlice<Node>,
95    ) -> anyhow::Result<Flat<Node>>;
96
97    fn format_footnotes(
98        &self,
99        body: ASlice<Node>,
100        html: &HtmlAllocator,
101    ) -> anyhow::Result<AId<Node>>;
102}
103
104// ------------------------------------------------------------------
105
106fn elementmeta_from_headinglevel(level: HeadingLevel) -> &'static ElementMeta {
107    match level {
108        HeadingLevel::H1 => *H1_META,
109        HeadingLevel::H2 => *H2_META,
110        HeadingLevel::H3 => *H3_META,
111        HeadingLevel::H4 => *H4_META,
112        HeadingLevel::H5 => *H5_META,
113        HeadingLevel::H6 => *H6_META,
114    }
115}
116
117fn elementmeta_from_num(level: i32) -> Option<&'static ElementMeta> {
118    match level {
119        1 => Some(*H1_META),
120        2 => Some(*H2_META),
121        3 => Some(*H3_META),
122        4 => Some(*H4_META),
123        5 => Some(*H5_META),
124        6 => Some(*H6_META),
125        _ => None,
126    }
127}
128
129// Returning a signed integer so that calculating with differences is
130// easy.
131fn headinglevel_num(level: HeadingLevel) -> i32 {
132    match level {
133        HeadingLevel::H1 => 1,
134        HeadingLevel::H2 => 2,
135        HeadingLevel::H3 => 3,
136        HeadingLevel::H4 => 4,
137        HeadingLevel::H5 => 5,
138        HeadingLevel::H6 => 6,
139    }
140}
141
142fn level_from_elementmeta(meta: &'static ElementMeta) -> Option<i32> {
143    if meta == *H1_META {
144        Some(1)
145    } else if meta == *H2_META {
146        Some(2)
147    } else if meta == *H3_META {
148        Some(3)
149    } else if meta == *H4_META {
150        Some(4)
151    } else if meta == *H5_META {
152        Some(5)
153    } else if meta == *H6_META {
154        Some(6)
155    } else {
156        None
157    }
158}
159
160fn text_to_anchor(s: &str, res: &mut String) {
161    let mut last_was_space = false;
162    for c in s.chars() {
163        if c.is_ascii_alphanumeric() {
164            res.push(c.to_ascii_lowercase());
165            last_was_space = false;
166        } else if c.is_whitespace() {
167            if !last_was_space {
168                res.push('-');
169            }
170        } else {
171            res.push('_');
172            last_was_space = false;
173        }
174    }
175}
176
177pub struct MarkdownFile {
178    path: PathBuf,
179}
180
181pub struct MarkdownHeader {
182    /// The body of the <hX> element
183    html: ASlice<Node>,
184    anchor_name: KString,
185}
186
187pub struct MarkdownHeading {
188    /// Original level as per .md document, used for building up (won't
189    /// correspond to the HTML any more after fixing that up).
190    level: HeadingLevel,
191    header: Option<MarkdownHeader>,
192    subheadings: Vec<MarkdownHeading>,
193}
194
195impl MarkdownHeading {
196    fn append_heading(&mut self, our_level: u32, h: MarkdownHeading) {
197        if h.level as u32 == our_level {
198            self.subheadings.push(h)
199        } else {
200            autovivify_last(&mut self.subheadings, || MarkdownHeading {
201                level: HeadingLevel::try_from(our_level as usize)
202                    .expect("must exist because h.level is yet larger"),
203                header: None,
204                subheadings: Vec::new(),
205            })
206            .append_heading(our_level + 1, h)
207        }
208    }
209
210    fn to_toc_html_fragment(&self, html: &HtmlAllocator) -> anyhow::Result<AId<Node>> {
211        let mut body = html.new_vec();
212        for subheading in &self.subheadings {
213            body.push(subheading.to_toc_html_fragment(html)?)?;
214        }
215        html.dl(
216            [],
217            [
218                if let Some(header) = &self.header {
219                    let mut anchor = String::new(); // cache?
220                    anchor.push_str("#");
221                    anchor.push_str(&header.anchor_name);
222                    html.dt(
223                        [],
224                        [html.a(
225                            [att("href", anchor)],
226                            // Should we actually strip HTML markup?
227                            &header.html,
228                        )?],
229                    )?
230                } else {
231                    html.dt([], [])?
232                },
233                html.dd([], body)?,
234            ],
235        )
236    }
237
238    // Again duplication with method in MarkdownMeta. Stupid. todo clean up?
239    fn top_heading_level(&self) -> Option<HeadingLevel> {
240        if self.header.is_some() {
241            Some(self.level)
242        } else {
243            self.subheadings
244                .iter()
245                .filter_map(|heading| heading.top_heading_level())
246                .max()
247        }
248    }
249}
250
251#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
252pub struct Backref(pub u32);
253
254impl Backref {
255    pub fn to_kstring(&self, with_hash: bool) -> KString {
256        KString::from_string(format!(
257            "{}footnoteref-{}",
258            if with_hash { "#" } else { "" },
259            self.0
260        ))
261    }
262}
263
264#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd)]
265pub struct Footnoteref(pub u32);
266
267impl Footnoteref {
268    pub fn to_kstring(&self, with_hash: bool) -> KString {
269        KString::from_string(format!(
270            "{}footnote-{}",
271            if with_hash { "#" } else { "" },
272            self.0
273        ))
274    }
275}
276
277struct FootnoteDefinition {
278    reference: Option<Footnoteref>,
279    text: Option<ASlice<Node>>,
280    /// places with references to this definition, in order of
281    /// appearance in document
282    backreferences: Vec<Backref>,
283}
284
285pub struct MarkdownMeta {
286    /// contents of <title> tag only (deriving from headers happens
287    /// outside)
288    title: Option<ASlice<Node>>,
289    headings: Vec<MarkdownHeading>,
290    /// footnote label to definition
291    footnotes: HashMap<KString, FootnoteDefinition>,
292}
293impl MarkdownMeta {
294    fn new() -> MarkdownMeta {
295        MarkdownMeta {
296            title: None,
297            headings: Vec::new(),
298            footnotes: HashMap::new(),
299        }
300    }
301
302    fn push_heading(&mut self, h: MarkdownHeading) {
303        match h.level {
304            HeadingLevel::H1 => self.headings.push(h),
305            _ => autovivify_last(&mut self.headings, || MarkdownHeading {
306                level: HeadingLevel::H1,
307                header: None,
308                subheadings: Vec::new(),
309            })
310            .append_heading(2, h),
311        }
312    }
313
314    // Stupid modified copy-paste[aha, from to_toc_html_fragment],
315    // "todo really unify the two DS;
316    // actually easy just None header ?"-- but now ~happy
317    // with it, OK? &Vec<MarkdownHeading> is now the thing to be generic on?
318    // Alright, should then do function on *that* ^, todo?
319    pub fn toc_html_fragment(&self, html: &HtmlAllocator) -> anyhow::Result<AId<Node>> {
320        let headings = self.title_and_remaining_headings().1;
321        let mut body = html.new_vec();
322        for subheading in headings {
323            body.push(subheading.to_toc_html_fragment(html)?)?;
324        }
325        // Using `div` here instead of `dl` is wrong in that multiple
326        // toplevel entries will be separate now. But what would the
327        // `dt`? Empty? It would indent the `dd` holding `body`. Do it
328        // iff there are >1 body nodes? Perennial question about what
329        // '#' header should mean in Markdown.
330        if true {
331            html.div([att("class", "toc_wrapper")], body)
332        } else {
333            html.dl([], [html.dt([], [])?, html.dd([], body)?])
334        }
335    }
336
337    // XX why not just preserialize the individual footnote
338    // definitions, and leave formatting of the rest to blog.rs?
339    // Checking for missing definitions should perhaps still be done
340    // in markdown.rs, though.
341    pub fn footnotes_html_fragment(
342        &self,
343        html: &HtmlAllocator,
344        style: &dyn StylingInterface,
345    ) -> Result<(usize, AId<Node>), MarkdownFileError> {
346        let mut footnotes: Vec<_> = self.footnotes.iter().collect();
347        footnotes.sort_by_key(|f| f.1.reference);
348        // dbg!(&footnotes);
349
350        let context = style.new_context(html)?;
351        let mut body = html.new_vec();
352        for (label, fnd) in &footnotes {
353            let reference = fnd
354                .reference
355                .ok_or_else(|| MarkdownFileError::UnusedFootnote {
356                    label: (*label).clone(),
357                })?;
358            let slice = fnd
359                .text
360                .ok_or_else(|| MarkdownFileError::MissingFootnoteDefinition {
361                    label: (*label).clone(),
362                })?;
363            let clean_slice = slice.unwrap_element(*P_META, true, html);
364            body.push_flat(context.format_footnote_definition(
365                html,
366                &reference,
367                &fnd.backreferences,
368                &clean_slice,
369            )?)?;
370        }
371        Ok((
372            footnotes.len(),
373            context.format_footnotes(body.as_slice(), html)?,
374        ))
375    }
376
377    /// Split title/header hierarchy into title and rest; takes
378    /// `<title>` if available by preference, otherwise the first
379    /// heading if it's a '#' and there are no other '#' ones. The
380    /// last returned value is true if a heading from the markdown
381    /// file was skipped (i.e. it needs to be dropped from the
382    /// generated HTML to avoid header duplication).
383    pub fn title_and_remaining_headings(
384        &self,
385    ) -> (Option<&ASlice<Node>>, &Vec<MarkdownHeading>, bool) {
386        if let Some(title) = &self.title {
387            (Some(title), &self.headings, false)
388        } else {
389            if let Some(header) = try_option! {
390                if self.headings.len() != 1 { return None; }
391                self.headings[0].header.as_ref()
392            } {
393                (Some(&header.html), &self.headings[0].subheadings, true)
394            } else {
395                (None, &self.headings, false)
396            }
397        }
398    }
399
400    /// The contents of an optional single `<title>` element,
401    /// or if missing, the first heading if it's a
402    /// '#' and there are no other '#' ones.
403    pub fn title(&self) -> Option<&ASlice<Node>> {
404        self.title_and_remaining_headings().0
405    }
406
407    /// Like `title` but as a string with markup stripped, and falling
408    /// back to `alternative` if not present.
409    pub fn title_string(&self, html: &HtmlAllocator, alternative: &str) -> anyhow::Result<KString> {
410        if let Some(sl) = self.title() {
411            let mut v = String::new();
412            sl.print_plain(&mut v, html)?;
413            Ok(KString::from_string(v))
414        } else {
415            Ok(KString::from_ref(alternative))
416        }
417    }
418
419    fn top_heading_level(&self) -> Option<HeadingLevel> {
420        self.headings
421            .iter()
422            .filter_map(|heading| heading.top_heading_level())
423            .max()
424    }
425}
426
427/// The result of processing a markdown file.
428pub struct ProcessedMarkdown {
429    /// Conversion to html of the text, with the original heading
430    /// levels translated to identical HTML levels (may need fixing up
431    /// before serving).
432    html: AId<Node>,
433    /// Metadata extracted also during the conversion.
434    meta: MarkdownMeta,
435}
436
437impl ProcessedMarkdown {
438    pub fn html(&self) -> AId<Node> {
439        self.html
440    }
441    pub fn meta(&self) -> &MarkdownMeta {
442        &self.meta
443    }
444
445    pub fn fixed_html(&self, html: &HtmlAllocator) -> anyhow::Result<AId<Node>> {
446        // Which is the top level we *want*?
447        let (opt_title, _heading, do_drop_h1) = self.meta.title_and_remaining_headings();
448        dt!(&format!(
449            "fixed_html {:?}",
450            opt_title.map_or_else(|| Ok(String::from("(no title)")), |t| t.to_string(html))
451        ));
452        // We want to either drop H1 in the document and not shift
453        // anything (because H1 existed and was the only H1 header,
454        // after dropping it the next level can only be H2 or less and
455        // we leave it at what remains), or, shift them if necessary
456        // so that the top level becomes H2. Unless it couldn't
457        // extract a title, in which case we leave the document
458        // untouched.
459        if opt_title.is_none() {
460            warn!("no title could be derived");
461            return Ok(self.html);
462        }
463        let fixup: Box<dyn Fn(_) -> _> = if do_drop_h1 {
464            warn!("do_drop_h1");
465            Box::new(|id: AId<Node>| -> anyhow::Result<Option<AId<Node>>> {
466                let node = html.get_node(id).expect("correct HtmlAllocator");
467                if let Some(elt) = node.as_element() {
468                    if elt.meta() == *H1_META {
469                        Ok(None)
470                    } else {
471                        Ok(Some(id))
472                    }
473                } else {
474                    Ok(Some(id))
475                }
476            })
477        } else {
478            if let Some(top_level_have) = self.meta.top_heading_level() {
479                let top_level_want = 2; // HeadingLevel::H2;
480                let diff = top_level_want - headinglevel_num(top_level_have);
481                warn!("diff = {diff}");
482                if diff == 0 {
483                    return Ok(self.html);
484                }
485                Box::new(move |id: AId<Node>| -> anyhow::Result<Option<AId<Node>>> {
486                    let node = html.get_node(id).expect("correct HtmlAllocator");
487                    if let Some(elt) = node.as_element() {
488                        if let Some(lvl) = level_from_elementmeta(elt.meta()) {
489                            let lvl2 = lvl + diff;
490                            let meta2 = elementmeta_from_num(lvl2).ok_or_else(|| {
491                                MarkdownFileError::HeaderLevelShiftOutOfRange { diff }
492                            })?;
493                            let elt2 = Element {
494                                meta: meta2,
495                                attr: elt.attr().clone(),
496                                body: elt.body().clone(),
497                            };
498                            Ok(Some(html.allocate_element(elt2)?))
499                        } else {
500                            Ok(Some(id))
501                        }
502                    } else {
503                        Ok(Some(id))
504                    }
505                })
506            } else {
507                warn!("no headings, thus noop");
508                return Ok(self.html);
509            }
510        };
511
512        let node2 = {
513            let elt = {
514                let node = html.get_node(self.html).expect(
515                    "ProcessedMarkdown to be used with the same HtmlAllocator it was created with",
516                );
517                // Bummer, Element is quite large (5 words?), but we have
518                // to free up the borrow from get_node because
519                // try_filter_map_body needs a writable one.
520                (*node.try_element()?).clone()
521            };
522            elt.try_filter_map_body::<Node>(fixup, html)?
523        };
524        Ok(html.allocate_element(node2)?)
525    }
526}
527
528// Internals for impl MarkdownFile:
529
530#[derive(Debug)]
531enum ContextTag<'t> {
532    Markdown(Tag<'t>),
533    Html(&'static ElementMeta),
534}
535
536impl<'t> Display for ContextTag<'t> {
537    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
538        match self {
539            ContextTag::Markdown(tag) => {
540                f.write_fmt(format_args!("Markdown {:?} scope", enum_name(tag)))
541            }
542            ContextTag::Html(meta) => {
543                f.write_fmt(format_args!("HTML {:?} element", meta.tag_name.as_str()))
544            }
545        }
546    }
547}
548
549impl<'t> PartialEq for ContextTag<'t> {
550    fn eq(&self, other: &Self) -> bool {
551        match (self, other) {
552            (ContextTag::Markdown(a), ContextTag::Markdown(b)) => a == b,
553            (ContextTag::Html(a), ContextTag::Html(b)) => std::ptr::eq(*a, *b),
554            _ => false,
555        }
556    }
557}
558
559impl<'t> ContextTag<'t> {
560    fn assert_eq(&self, closing: &ContextTag) -> Result<(), MarkdownFileError> {
561        if *self == *closing {
562            Ok(())
563        } else {
564            Err(MarkdownFileError::UnbalancedTags {
565                opening: self.to_string(),
566                closing: closing.to_string(),
567            })
568        }
569    }
570}
571
572struct ContextFrame<'a, 't> {
573    tag: ContextTag<'t>,
574    // meta: &'static ElementMeta, -- no, given ad-hoc on closing
575    // event.
576    atts: AVec<'a, (KString, KString)>,
577    body: AVec<'a, Node>,
578    last_footnote_reference: Option<u32>, // last index into body holding one
579}
580
581/// Convert to HTML, and capture metainformation to allow for
582/// creation of TOC and footnotes section.
583pub fn markdown_to_html(
584    s: &str,
585    html: &HtmlAllocator,
586) -> Result<ProcessedMarkdown, MarkdownFileError> {
587    let mut options = Options::empty();
588    options.insert(Options::ENABLE_TABLES);
589    options.insert(Options::ENABLE_FOOTNOTES);
590    options.insert(Options::ENABLE_STRIKETHROUGH);
591    options.insert(Options::ENABLE_TASKLISTS);
592    options.insert(Options::ENABLE_SMART_PUNCTUATION); // XX config
593    options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
594
595    let mut parser = Parser::new_ext(&s, options);
596
597    // Context
598    let mut _context: Vec<ContextFrame> = Vec::new();
599    let mut context = &mut _context;
600    // Push a base frame (wrapper around everything):
601    context.push(ContextFrame {
602        tag: ContextTag::Markdown(Tag::Paragraph), // fake
603        atts: AVec::new(html),
604        body: AVec::new(html),
605        last_footnote_reference: None,
606    });
607    macro_rules! new_contextframe {
608        ($tag:expr) => {
609            ContextFrame {
610                tag: $tag,
611                atts: AVec::new(html),
612                body: AVec::new(html),
613                last_footnote_reference: None,
614            }
615        };
616    }
617
618    // Opening a context
619    macro_rules! mdopen {
620        ($tag:expr) => {
621            context.push(new_contextframe!(ContextTag::Markdown($tag)))
622        };
623    }
624
625    // Closing a context
626    let frame_to_element =
627        |frame: ContextFrame, meta: &'static ElementMeta| -> Result<AId<Node>, MarkdownFileError> {
628            Ok(html.new_element(meta, frame.atts.as_slice(), frame.body.as_slice())?)
629        };
630    let close = |context: &mut Vec<ContextFrame>,
631                 tag: ContextTag,
632                 meta: &'static ElementMeta|
633     -> Result<(), MarkdownFileError> {
634        let frame = context.pop().expect("start before end");
635        frame.tag.assert_eq(&tag)?;
636        let outerframe = context.last_mut().expect("at least base frame");
637        outerframe.body.push(frame_to_element(frame, meta)?)?;
638        Ok(())
639    };
640    macro_rules! mdclose {
641        ($tag:expr, $meta:expr) => {
642            close(&mut context, ContextTag::Markdown($tag), $meta)
643        };
644    }
645    // Alternative approach:
646    macro_rules! pop {
647        ($tag:expr) => {{
648            // XX minimize code via local function
649            let frame = context.pop().expect("start before end");
650            frame.tag.assert_eq(&$tag)?;
651            let outerframe = context.last_mut().expect("at least base frame");
652            (frame.atts, frame.body, outerframe)
653        }};
654    }
655    macro_rules! mdpop {
656        ($tag:expr) => {
657            pop!(ContextTag::Markdown($tag))
658        };
659    }
660
661    macro_rules! current_frame {
662        () => {
663            context
664                .last_mut()
665                .expect("At least base frame; at least bug in markdown lib?")
666        };
667    }
668
669    let mut markdownmeta = MarkdownMeta::new();
670    // let mut current_heading = None;
671    let mut anchor_name = String::new();
672    let mut tmp = String::new();
673    // Anchor names to number of uses, acting as id
674    let mut anchor_names: HashMap<KString, u32> = HashMap::new();
675
676    let mut next_footnote_number = infinite_sequence(1, 1);
677    let mut next_footnote_backreference = infinite_sequence(1, 1);
678
679    while let Some(item) = parser.next() {
680        match item {
681            Event::Start(x) => match x {
682                Tag::Paragraph => mdopen!(Tag::Paragraph),
683                Tag::Heading(level, fragmentid, classes) => {
684                    mdopen!(Tag::Heading(level, fragmentid, classes))
685                }
686                Tag::BlockQuote => mdopen!(Tag::BlockQuote),
687                Tag::CodeBlock(kind) => mdopen!(Tag::CodeBlock(kind)),
688                Tag::List(firstitemnum) => mdopen!(Tag::List(firstitemnum)),
689                Tag::Item => mdopen!(Tag::Item),
690                Tag::FootnoteDefinition(label) => mdopen!(Tag::FootnoteDefinition(label)),
691                Tag::Table(alignments) => mdopen!(Tag::Table(alignments)),
692                Tag::TableHead => mdopen!(Tag::TableHead),
693                Tag::TableRow => mdopen!(Tag::TableRow),
694                Tag::TableCell => mdopen!(Tag::TableCell),
695                Tag::Emphasis => mdopen!(Tag::Emphasis),
696                Tag::Strong => mdopen!(Tag::Strong),
697                Tag::Strikethrough => mdopen!(Tag::Strikethrough),
698                Tag::Link(linktype, url, title) => mdopen!(Tag::Link(linktype, url, title)),
699                Tag::Image(linktype, url, title) => mdopen!(Tag::Image(linktype, url, title)),
700            },
701            Event::End(x) => match x {
702                Tag::Paragraph => mdclose!(Tag::Paragraph, *P_META)?,
703                Tag::Heading(level, fragmentid, classes) => {
704                    {
705                        // Store generated HTML for this
706                        // heading in markdownmeta, too,
707                        // and add a reference to the html
708                        // element in the body.
709                        let frame = current_frame!();
710                        let bodyslice = frame.body.as_slice();
711                        tmp.clear();
712                        for node in bodyslice.iter_node(html) {
713                            node.print_plain(&mut tmp, html)?;
714                        }
715                        anchor_name.clear();
716                        text_to_anchor(&tmp, &mut anchor_name);
717
718                        // Append number if necessary to avoid conflicts
719                        // (XX should actually do a check like this on the whole
720                        // generated page (uh, preserialized parts!))
721                        let anchor_name_kstr;
722                        'search: loop {
723                            // loop bc labels on blocks are unstable
724                            for _ in 0..10 {
725                                if let Some(counter) = anchor_names.get_mut(&*anchor_name) {
726                                    *counter += 1;
727                                    anchor_name.push_str(&format!("-{}", *counter));
728                                } else {
729                                    anchor_name_kstr = KString::from(&anchor_name);
730                                    anchor_names.insert(anchor_name_kstr.clone(), 1);
731                                    break 'search;
732                                }
733                            }
734                            warn!(
735                                "more than 10 *levels* of conflicts trying to find \
736                                       unallocated name; leaving it conflicting"
737                            );
738                            anchor_name_kstr = KString::from(&anchor_name);
739                            break;
740                        }
741
742                        frame.atts.push(
743                            // XX Should offer an `attribute`
744                            // method that accepts 2 arguments
745                            // which are ToKString. clone should
746                            // be faster than from_str.
747                            html.attribute("id", anchor_name_kstr.as_str())?,
748                        )?;
749
750                        markdownmeta.push_heading(MarkdownHeading {
751                            level,
752                            header: Some(MarkdownHeader {
753                                html: bodyslice,
754                                anchor_name: anchor_name_kstr,
755                            }),
756                            subheadings: Vec::new(),
757                        });
758                    }
759
760                    let meta = elementmeta_from_headinglevel(level);
761                    // XX todo: handle fragmentid, classes
762                    mdclose!(Tag::Heading(level, fragmentid, classes), meta)?
763                }
764                Tag::BlockQuote => mdclose!(Tag::BlockQuote, *BLOCKQUOTE_META)?,
765                Tag::CodeBlock(kind) =>
766                // XX kind -> class="language-xxx", and do highlighting
767                {
768                    mdclose!(Tag::CodeBlock(kind), *PRE_META)?
769                }
770
771                Tag::List(firstitemnum) => mdclose!(
772                    Tag::List(firstitemnum),
773                    if firstitemnum.is_some() {
774                        *OL_META
775                    } else {
776                        *UL_META
777                    }
778                )?,
779                Tag::Item => mdclose!(Tag::Item, *LI_META)?,
780                Tag::FootnoteDefinition(label) => {
781                    // A footnote definition. The value contained is the footnote's
782                    // label by which it can be referred to.
783                    let frame = context.pop().expect("start before end");
784                    if let Some(FootnoteDefinition {
785                        text: footnote_text,
786                        ..
787                    }) = markdownmeta.footnotes.get_mut(&*label)
788                    {
789                        if let Some(_) = footnote_text {
790                            return Err(MarkdownFileError::MultipleFootnoteWithLabel {
791                                label: KString::from_ref(&*label),
792                            });
793                        } else {
794                            *footnote_text = Some(frame.body.as_slice());
795                            // XX what about atts?
796                        }
797                    } else {
798                        // Definition before first use
799                        markdownmeta.footnotes.insert(
800                            KString::from_ref(&*label),
801                            FootnoteDefinition {
802                                reference: None,
803                                text: Some(frame.body.as_slice()),
804                                backreferences: Vec::new(),
805                            },
806                        );
807                    }
808                }
809                Tag::Table(alignments) => mdclose!(
810                    Tag::Table(alignments),
811                    // XX todo: handle alignments
812                    *TABLE_META
813                )?,
814                Tag::TableHead => mdclose!(Tag::TableHead, *TH_META)?,
815                Tag::TableRow => mdclose!(Tag::TableRow, *TR_META)?,
816                Tag::TableCell => mdclose!(Tag::TableCell, *TD_META)?,
817                Tag::Emphasis => mdclose!(Tag::Emphasis, *EM_META)?,
818                Tag::Strong => mdclose!(Tag::Strong, *STRONG_META)?,
819                Tag::Strikethrough => mdclose!(Tag::Strikethrough, *S_META)?,
820                Tag::Link(linktype, url, title) => {
821                    let (mut atts, body, outerframe) = mdpop!(
822                        // XX uh, need to clone just to verify. better?
823                        Tag::Link(linktype, url.clone(), title)
824                    );
825
826                    let elt = match linktype {
827                        // Inline link like `[foo](bar)`
828                        LinkType::Inline => {
829                            atts.push(html.attribute("href", kstring_myfrom2(url))?)?;
830                            html.a(atts, body)
831                        }
832                        // Reference link like `[foo][bar]`
833                        LinkType::Reference => {
834                            warn_todo!(
835                                "LinkType::Reference: \
836                                            url, presumably?"
837                            );
838                            atts.push(html.attribute("href", kstring_myfrom2(url))?)?;
839                            html.a(atts, body)
840                        }
841                        // Reference without destination in
842                        // the document, but resolved by the
843                        // broken_link_callback
844                        LinkType::ReferenceUnknown => todo!(),
845                        // Collapsed link like `[foo][]`
846                        LinkType::Collapsed => todo!(),
847                        // Collapsed link without destination
848                        // in the document, but resolved by
849                        // the broken_link_callback
850                        LinkType::CollapsedUnknown => todo!(),
851                        // Shortcut link like `[foo]`
852                        LinkType::Shortcut => {
853                            warn_todo!(
854                                "LinkType::Shortcut: need to build \
855                                            index and look up"
856                            );
857                            atts.push(html.attribute("href", kstring_myfrom2(url))?)?;
858                            html.a(atts, body)
859                        }
860                        // Shortcut without destination in the
861                        // document, but resolved by the
862                        // broken_link_callback
863                        LinkType::ShortcutUnknown => todo!(),
864                        // Autolink like `<http://foo.bar/baz>`
865                        LinkType::Autolink => html.a([att("href", kstring_myfrom2(url))], body),
866                        // Email address in autolink like `<john@example.org>`
867                        LinkType::Email => html.a([att("href", email_url(&url))], body),
868                    };
869                    outerframe.body.push(elt?)?;
870                }
871                Tag::Image(linktype, url, title) =>
872                // Oh, almost COPYPASTE of Tag::Link
873                {
874                    let (mut atts, body, outerframe) = mdpop!(
875                        // XX uh, need to clone just to verify. better?
876                        Tag::Link(linktype, url.clone(), title)
877                    );
878                    let elt = match linktype {
879                        LinkType::Inline => {
880                            atts.push(html.attribute("src", kstring_myfrom2(url))?)?;
881                            html.img(atts, body)
882                        }
883                        LinkType::Reference => todo!(),
884                        LinkType::ReferenceUnknown => todo!(),
885                        LinkType::Collapsed => todo!(),
886                        LinkType::CollapsedUnknown => todo!(),
887                        LinkType::Shortcut => todo!(),
888                        LinkType::ShortcutUnknown => todo!(),
889                        LinkType::Autolink => todo!(),
890                        LinkType::Email => todo!(),
891                    };
892                    outerframe.body.push(elt?)?;
893                }
894            },
895            Event::Text(s) => {
896                let frame = current_frame!();
897                frame.body.push(html.str(&s)?)?;
898            }
899            Event::Code(s) => {
900                warn!("Event::Code({:?})", &*s);
901                let frame = current_frame!();
902                let elt = html.code([], [html.str(&s)?])?;
903                frame.body.push(elt)?;
904            }
905            Event::Html(s) => {
906                // I don't really want to put it all in here. This
907                // function is horribly long. But working with
908                // closures and hygienic macros in a way to re-use
909                // them, move them outside, is too painful for me
910                // right now, so I go.
911                dt!(&format!("Event::Html({s:?})"));
912                for token in html5gum::Tokenizer::new(&*s).infallible() {
913                    match token {
914                        Token::StartTag(starttag) => {
915                            let name: &str = std::str::from_utf8(&**starttag.name)?;
916                            let meta = METADB.elementmeta.get(name).ok_or_else(|| {
917                                MarkdownFileError::NotAnHTML5TagName {
918                                    name: KString::from_ref(name),
919                                    is_opening: true,
920                                }
921                            })?;
922                            let mut newframe = new_contextframe!(ContextTag::Html(meta));
923                            for (k, v) in starttag.attributes {
924                                newframe
925                                    .atts
926                                    .push(html.attribute(kstring(k)?, kstring(v)?)?)?;
927                            }
928                            if starttag.self_closing || !meta.has_closing_tag {
929                                let cf = current_frame!();
930                                // XX give context to errors,
931                                // e.g. invalid attribute because,
932                                // where was the element coming
933                                // from? Or utf-8 conversion errors above, too.
934                                cf.body.push(frame_to_element(newframe, meta)?)?;
935                            } else {
936                                context.push(newframe);
937                            }
938                        }
939                        Token::EndTag(endtag) => {
940                            let name: &str = std::str::from_utf8(&**endtag.name)?;
941                            let meta = METADB.elementmeta.get(name).ok_or_else(|| {
942                                MarkdownFileError::NotAnHTML5TagName {
943                                    name: KString::from_ref(name),
944                                    is_opening: false,
945                                }
946                            })?;
947                            if meta.has_closing_tag {
948                                let (atts, body, outerframe) =
949                                    // XX error context. if only I had
950                                    // location info? sigh?
951                                    pop!(ContextTag::Html(meta));
952                                // Special HTML tag treatments
953                                if meta == *TITLE_META {
954                                    if markdownmeta.title.is_some() {
955                                        return Err(MarkdownFileError::MultipleTitleElements);
956                                    }
957                                    markdownmeta.title = Some(body.as_slice());
958                                    // XX dropping atts OK?
959                                } else {
960                                    outerframe.body.push(html.new_element(
961                                        meta,
962                                        atts.as_slice(),
963                                        body.as_slice(),
964                                    )?)?;
965                                }
966                            } else {
967                                // NOOP, we haven't made a frame for it.
968                            }
969                        }
970                        Token::String(s) => {
971                            let frame = current_frame!();
972                            frame.body.push(html.kstring(kstring(s)?)?)?;
973                        }
974                        Token::Comment(_s) => {
975                            // This happens only when <!-- and -->
976                            // appear in the same markdown event,
977                            // i.e. in the same paragraph.  todo:
978                            // do something with _s?
979                        }
980                        Token::Doctype(_) => todo!(),
981                        Token::Error(e) => {
982                            if s.starts_with("<!--") {
983                                // XX how to check `e` ? Should verify it's "eof-in-comment"
984                                // let newframe = new_contextframe!(
985                                //     ContextTag::HtmlComment);
986                                // context.push(newframe);
987
988                                // No, slurp up markdown
989                                // events right here until -->
990                                // appears.
991                                while let Some(item) = parser.next() {
992                                    match item {
993                                        Event::Html(s) => {
994                                            if s.starts_with("-->") {
995                                                break;
996                                            }
997                                        }
998                                        _ => (),
999                                    }
1000                                }
1001                            } else {
1002                                return Err(MarkdownFileError::HTML5ParsingError {
1003                                    error: e,
1004                                    input: s.as_ref().into(),
1005                                });
1006                            }
1007                        }
1008                    }
1009                }
1010            }
1011            Event::FootnoteReference(label) => {
1012                // "A reference to a footnote with given label, which may or may
1013                // not be defined by an event with a `Tag::FootnoteDefinition`
1014                // tag. Definitions and references to them may occur in any
1015                // order."
1016                let backref = Backref(next_footnote_backreference());
1017                let reference = if let Some(fnd) = markdownmeta.footnotes.get_mut(&*label) {
1018                    let reference = if let Some(reference) = fnd.reference {
1019                        reference
1020                    } else {
1021                        let reference = Footnoteref(next_footnote_number());
1022                        fnd.reference = Some(reference);
1023                        reference
1024                    };
1025                    fnd.backreferences.push(backref.clone());
1026                    reference
1027                } else {
1028                    let reference = Footnoteref(next_footnote_number());
1029                    markdownmeta.footnotes.insert(
1030                        KString::from_ref(&*label),
1031                        FootnoteDefinition {
1032                            reference: Some(reference),
1033                            text: None,
1034                            backreferences: vec![backref.clone()],
1035                        },
1036                    );
1037                    reference
1038                };
1039
1040                let frame = current_frame!();
1041                if let Some(i) = frame.last_footnote_reference {
1042                    if i == frame.body.len() {
1043                        // Separate the new reference from the
1044                        // last reference; todo?: ideally the 3
1045                        // `sup` would be merged.
1046                        frame.body.push(html.sup([], [html.str(",")?])?)?;
1047                    }
1048                }
1049                frame.body.push(html.sup(
1050                    [att("id", backref.to_kstring(false))],
1051                    [html.a(
1052                        [att("href", reference.to_kstring(true))],
1053                        [html.string(reference.0.to_string())?],
1054                    )?],
1055                )?)?;
1056                frame.last_footnote_reference = Some(frame.body.len());
1057            }
1058            Event::SoftBreak => {
1059                // a single \n in the input
1060                let frame = current_frame!();
1061                frame.body.push(html.str("\n")?)?;
1062            }
1063            Event::HardBreak => {
1064                // "  \n" in the input
1065                let frame = current_frame!();
1066                frame.body.push(html.br([], [])?)?;
1067            }
1068            Event::Rule => {
1069                let frame = current_frame!();
1070                frame.body.push(html.hr([], [])?)?;
1071            }
1072            Event::TaskListMarker(checked) => {
1073                let frame = current_frame!();
1074                let mut atts = html.new_vec();
1075                atts.push(html.attribute("type", "checkbox")?)?;
1076                atts.push(html.attribute("disabled", "")?)?;
1077                if checked {
1078                    atts.push(html.attribute("checked", "")?)?;
1079                }
1080                frame.body.push(html.input(atts, [])?)?;
1081            }
1082        }
1083    }
1084
1085    match context.len() {
1086        0 => panic!("top-level context was dropped -- should be impossible?"),
1087        1 => (),
1088        n => {
1089            return Err(MarkdownFileError::NonClosedContexts {
1090                n: n - 1,
1091                msg: context[1..]
1092                    .iter()
1093                    .map(|c| c.tag.to_string())
1094                    .collect::<Vec<String>>()
1095                    .join(", "),
1096            })
1097        }
1098    }
1099    let baseframe = context.pop().unwrap();
1100    Ok(ProcessedMarkdown {
1101        html: frame_to_element(baseframe, *DIV_META)?,
1102        meta: markdownmeta,
1103    })
1104}
1105
1106// (No point for this type, really, only holds the path for one method
1107// call.)
1108impl MarkdownFile {
1109    pub fn new(path: PathBuf) -> MarkdownFile {
1110        MarkdownFile { path }
1111    }
1112
1113    pub fn path(&self) -> &PathBuf {
1114        &self.path
1115    }
1116
1117    pub fn process_to_html(
1118        &self,
1119        html: &HtmlAllocator,
1120    ) -> Result<ProcessedMarkdown, MarkdownFileError> {
1121        // `Parser` is NOT supporting streaming. For reasons of
1122        // shining in (superficial) performance bencharks?
1123        // XX impose a size limit on the markdown file here?
1124        let s = read_to_string(&self.path)
1125            .with_context(|| anyhow::anyhow!("can't read file {:?}", self.path))?;
1126
1127        markdown_to_html(&s, html)
1128    }
1129}